ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1591985 [1/2] - in /ctakes/sandbox/dictionarytool: data/default/ doc/ example/ example/org/ example/org/apache/ example/org/apache/ctakes/ example/org/apache/ctakes/dictionarytool/ src/org/apache/ctakes/dictionarytool/ src/org/apache/ctake...
Date Fri, 02 May 2014 17:47:45 GMT
Author: seanfinan
Date: Fri May  2 17:47:44 2014
New Revision: 1591985

URL: http://svn.apache.org/r1591985
Log:
Tiny HowTo doc
Minor Updates
goofy examples, database templates, etc.

Added:
    ctakes/sandbox/dictionarytool/data/default/ConversionSources.txt   (with props)
    ctakes/sandbox/dictionarytool/data/default/UmlsAllSources.txt   (with props)
    ctakes/sandbox/dictionarytool/doc/
    ctakes/sandbox/dictionarytool/doc/howto.txt   (with props)
    ctakes/sandbox/dictionarytool/example/
    ctakes/sandbox/dictionarytool/example/org/
    ctakes/sandbox/dictionarytool/example/org/apache/
    ctakes/sandbox/dictionarytool/example/org/apache/ctakes/
    ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/
    ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/ChronicDiseaseLister.java   (with props)
    ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/UmlsSourceNamesPrinter.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/RelationsCreator.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiRelationsMapReader.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsRelationsForCuisReader.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/UmlsFileName.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/index/
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/index/MrconsoIndex.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/index/MrrelIndex.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/index/MrstyIndex.java   (with props)
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiCodesMapWriter.java   (with props)
Modified:
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiTextsMapReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/OrangebookReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SemanticTypeListReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SourceTypeListReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/TuiListReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTextsReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTuisReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsSemanticTypeTuiReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTuisForCuisReader.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CreatorProperties.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiTuiUtil.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/FileUtil.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/JdbcUtil.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/RareWordUtil.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/TokenUtil.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/UmlsSourceTypeCuiValidator.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/UmlsTermUtil.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/CuiTextsMapWriter.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/FirstWordDbWriter.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/RareWordDbWriter.java
    ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/writer/TuiListWriter.java

Added: ctakes/sandbox/dictionarytool/data/default/ConversionSources.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/data/default/ConversionSources.txt?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/data/default/ConversionSources.txt (added)
+++ ctakes/sandbox/dictionarytool/data/default/ConversionSources.txt Fri May  2 17:47:44 2014
@@ -0,0 +1,4 @@
+ICD10PCS
+ICD9CM
+RXNORM
+SNOMEDCT

Propchange: ctakes/sandbox/dictionarytool/data/default/ConversionSources.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/dictionarytool/data/default/UmlsAllSources.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/data/default/UmlsAllSources.txt?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/data/default/UmlsAllSources.txt (added)
+++ ctakes/sandbox/dictionarytool/data/default/UmlsAllSources.txt Fri May  2 17:47:44 2014
@@ -0,0 +1,62 @@
+AIR
+AOD
+AOT
+CCS
+CHV
+COSTAR
+CSP
+CST
+DXP
+FMA
+GO
+HCPCS
+HL7V2.5
+HL7V3.0
+HUGO
+ICD10PCS
+ICD9CM
+ICPC
+ICPCBAQ
+ICPCDAN
+ICPCDUT
+ICPCFIN
+ICPCFRE
+ICPCGER
+ICPCHEB
+ICPCHUN
+ICPCITA
+ICPCNOR
+ICPCPOR
+ICPCSPA
+ICPCSWE
+LCH
+LNC
+MCM
+MEDLINEPLUS
+MSH
+MTH
+MTHCH
+MTHFDA
+MTHHH
+MTHHL7V2.5
+MTHICD9
+MTHMST
+MTHMSTFRE
+MTHMSTITA
+MTHSPL
+NCBI
+NCI
+NDFRT
+OMIM
+PDQ
+QMR
+RAM
+RXNORM
+SCTSPA
+SNOMEDCT
+SPN
+SRC
+TKMT
+USPMG
+UWDA
+VANDF

Propchange: ctakes/sandbox/dictionarytool/data/default/UmlsAllSources.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/dictionarytool/doc/howto.txt
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/doc/howto.txt?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/doc/howto.txt (added)
+++ ctakes/sandbox/dictionarytool/doc/howto.txt Fri May  2 17:47:44 2014
@@ -0,0 +1,59 @@
+>java -cp DictionaryTool.jar org.apache.ctakes.dictionarytool.DictionaryCreator
+
+Dictionary Creator: Creates a flat file Cui|Text or Database Dictionary from UMLS and Orangebook
+Database Dictionary can be indexed by each Text's First Word or Rarest Word (for the dictionary)
+Minimal Usage: DictionaryCreator -umls pathToUmlsRoot -ol pathToFlatFileOutput
+
+-fw             Create First Word Index
+-umls           Umls Root Directory
+-ob             Orangebook Path
+-fd             Format Data Directory
+-tui            Input Tui List Path
+-src            Source Type List Path
+-ol             Output Cui and Term List Path
+-db             Output Database Url
+-tbl            Output Database Table
+
+The UMLS Root Directory must be specified
+One form of output must be specified using either -ol or -db and -tbl
+The default index type for databases is Rare Word Index
+If an Orangebook Path is not specified then (orangebook) medication terms are not written
+If a Format Data Directory is not specified then the default is used: ./data/default
+If an Input Tui List Path is not specified then the cTakes Tuis are used: ./data/default/CtakesAllTuis.txt
+If a Source Type List Path is not specified then Snomed is used: ./data/default/CtakesSources.txt
+
+Important: Dictionary entries are appended to the output file or database.  
+Running the same command twice will result in a database with all terms existing twice.
+
+The data/default/ directory does include non-default possibilities, such as files listing only single cTakes groups:
+e.g. CtakesAnatTuis.txt
+and all UMLS groups:
+UmlsAllTuis.txt
+that can be used with the option -tui ./data/default/UmlsAlltuis.txt
+
+There is also a file with all UMLS sources:
+UmlsAllSources.txt
+that can be used with the option -src ./data/default/UmlsAllSources.txt
+
+Remember that if you want to output to a database you must specify both the url and table name:
+-db jdbc:hsqldb:file:pathToMyDatabase -tbl myTableName
+
+Also remember that hsqldb requires the entire url to be lowercase.
+
+"Format Data" refers to the data that is used to format the end-result dictionary by trimming or expanding the umls entries.
+It is recommended that the defaults are used, but you are welcome to experiment with your own.
+
+
+
+There are a few other toys that can be found by perusing the source, such as a tool that creates a mapping of codes 
+for like terms in different dictionaries:
+ICD10|ICD9|RXNORM|SNOMEDCT
+Usage: java -cp DictionaryTool.jar org.apache.ctakes.dictionarytool.CodeMapCreator -umls pathToUmlsRoot -ol pathToFlatFileOutput
+
+Some of these extra utilities may be experimental or unfinished, so user beware.
+
+
+
+At this time the code could use some javadocs and unit tests, plus a little cleanup.  I'm very busy, so volunteer works is appreciated.
+
+Enjoy
\ No newline at end of file

Propchange: ctakes/sandbox/dictionarytool/doc/howto.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/ChronicDiseaseLister.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/ChronicDiseaseLister.java?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/ChronicDiseaseLister.java (added)
+++ ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/ChronicDiseaseLister.java Fri May  2 17:47:44 2014
@@ -0,0 +1,288 @@
+package org.apache.ctakes.dictionarytool;
+
+import org.apache.ctakes.dictionarytool.reader.CuiRelationsMapReader;
+import org.apache.ctakes.dictionarytool.reader.CuiRelationsMapReader.CuiRelations;
+import org.apache.ctakes.dictionarytool.reader.SourceTypeListReader;
+import org.apache.ctakes.dictionarytool.reader.TuiListReader;
+import org.apache.ctakes.dictionarytool.reader.UmlsCuisForTuisReader;
+import org.apache.ctakes.dictionarytool.util.CreatorProperties;
+import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
+import org.apache.ctakes.dictionarytool.util.FileUtil;
+import org.apache.ctakes.dictionarytool.util.UmlsSourceTypeCuiValidator;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.SOURCE;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.TERM_LIST;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.TUI_LIST;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.UMLS_ROOT;
+import static org.apache.ctakes.dictionarytool.util.UmlsFileName.CUI_TERM_MAP;
+import static org.apache.ctakes.dictionarytool.util.UmlsFileName.CUI_TUI_MAP;
+
+
+/**
+ * Given a file created by {@link org.apache.ctakes.dictionarytool.RelationsCreator},
+ * writes a file listing chronic diseases.
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/27/14
+ */
+final public class ChronicDiseaseLister {
+
+   static private final Logger LOGGER = Logger.getLogger( "ChronicDiseaseLister" );
+
+   // Cuis for the top 15 most common chronic diseases according to the Centers for Disease Control and Prevention
+   static private enum CdcTopFifteen {
+      HYPERTENSION("C0020538"),
+      HIGH_CHOLESTEROL("C0020443"),
+      ISCHEMIC_HEART_DISEASE("C0151744"),
+      ARTHRITIS("C0263680","C1533862","C0003864","C1971754","C0559169"),
+      DIABETES("C0011849"),
+      HEART_FAILURE("C0264716"),
+      KIDNEY_DISEASE("C0403447","C0022661"),
+      DEPRESSION("C0581391"),
+      CHRONIC_OBSTRUCTIVE_PULMONARY_DISEASE("C0024117"),
+      ALZHEIMERS("C0002395"),
+      ATRIAL_FIBRILLATION("C0694539","C0729790","C1542750","C0004238"),
+      CANCER("C1306459","C0006826","C0455471"),
+      OSTEOPOROSIS("C0029694","C0029456"),
+      ASTHMA("C0004096","C1272273","C1525059","C0455544"),
+      STROKE("C1540598","C0038454","C0559159");
+      final private Collection<String> __cuis;
+      private CdcTopFifteen( final String... cuis ) {
+         __cuis = new HashSet<String>( Arrays.asList( cuis ) );
+      }
+   }
+
+   // Cuis for top chronic diseases covered by medicare and medicaid that aren't in the CDC top 15
+   // https://www.ccwdata.org/web/guest/condition-categories
+   static private enum MedicareMedicaid {
+      HYPERTHYROIDISM("C0020550","C0455485"),
+      // actually listed as "acute myocardial infarction"
+      MYOCARDIAL_INFARCTION("C0027051"),
+      ANEMIA("C0475143","C0029744","C0002871","C1534443","C0559154"),
+      // actually listed as "benign prostatic hyperplasia"
+      PROSTATIC_HYPERPLASIA("C1704272","C0005001"),
+      CATARACT("C0029531","C0344264","C0086543","C0455518","C1690964"),
+      GLAUCOMA("C0029802","C0017601","C0348555","C0455517"),
+      HIP_FRACTURE("C0019557","C1540982","C1540983","C1272069"),
+      PELVIC_FRACTURE("C0149531"),
+      HYPERLIPIDEMIA("C0348494","C0020473");
+      final private Collection<String> __cuis;
+      private MedicareMedicaid( final String... cuis ) {
+         __cuis = new HashSet<String>( Arrays.asList( cuis ) );
+      }
+   }
+
+//   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\Data\\UMLS\\2011AB\\META",
+//                                               "-rel", "C:/Spiffy/Output/DictionaryToolTest/DisoRelations.bsv",
+//                                               "-tui", "data/default/CtakesDisoTuis.txt",
+//                                               "-ol", "C:/Spiffy/Output/DictionaryToolTest/ChronicDiseasesAll.bsv"};
+
+   // according to http://aclweb.org/anthology/N/N13/N13-3007.pdf  the only difference between parent/child and
+   // broader/narrower is that parent/child came from the original source, whereas broader/narrower was added by
+   // the UMLS editors.  Still, it may be prudent to separate the two.
+   static private final boolean WRITE_CHILD = true;
+   static private final boolean WRITE_NARROW = true;
+
+   public static void main( final String[] args ) {
+      final String relationsFilePath = getRelationsFilePath( args );
+//      final String relationsFilePath = getRelationsFilePath( DEBUG_ARGS );
+      if ( relationsFilePath == null || relationsFilePath.isEmpty() ) {
+         System.out.println( "Please enter a path to the relations file with the parameter name -rel" );
+         System.exit( 0 );
+      }
+      // Set properties (user options) used to create the dictionary
+      final CreatorProperties properties = new CreatorProperties( args );
+//      final CreatorProperties properties = new CreatorProperties( DEBUG_ARGS );
+      if ( !TERM_LIST.hasValue() ) {
+         LOGGER.severe( "Please enter a valid output path with the parameter name " + TERM_LIST.getKey() );
+         System.exit( 1 );
+      }
+      // Read wanted Sources
+      final Collection<String> wantedSources = SourceTypeListReader.readSourceTypes( SOURCE.getValue() );
+      // Read wanted Tuis
+      final Collection<String> wantedTuis = TuiListReader.readTuiList( TUI_LIST.getValue() );
+      if ( wantedTuis == null || wantedTuis.isEmpty() ) {
+         LOGGER.severe( "No valid TUI codes found in " + TUI_LIST.getValue() );
+         System.exit( 1 );
+      }
+      // get the valid Cuis for all wanted Tuis
+      final Map<String,Collection<String>> diseaseCuisAndTuis
+            = CuiTuiUtil.getValidCuisAndTuis( UMLS_ROOT.getValue(), wantedSources, wantedTuis );
+      if ( diseaseCuisAndTuis == null || diseaseCuisAndTuis.isEmpty() ) {
+         LOGGER.severe( "No valid CUI codes found" );
+         System.exit( 1 );
+      }
+      final Collection<String> congenitalCuis = getCongenitalCuis( diseaseCuisAndTuis );
+      writeCuiTextList( relationsFilePath, diseaseCuisAndTuis.keySet(), congenitalCuis );
+   }
+
+   static private String getRelationsFilePath( final String... args ) {
+      for ( int i=0; i<args.length-1; i++ ) {
+         if ( args[i].equalsIgnoreCase( "-rel" ) ) {
+            return args[i+1];
+         }
+      }
+      return "";
+   }
+
+   static private Collection<String> getCongenitalCuis( final Map<String, Collection<String>> validCuisAndTuis ) {
+      // Tui T019 is for Congenital Abnormality
+      // Some 99% of Congenital conditions, though possibly treatable, are chronic
+      final Collection<String> congenitalCuis = new HashSet<String>();
+      for ( Map.Entry<String,Collection<String>> entry : validCuisAndTuis.entrySet() ) {
+         if ( entry.getValue().contains( "T019" ) ) {
+            congenitalCuis.add( entry.getKey() );
+         }
+      }
+      return congenitalCuis;
+   }
+
+   static private void writeCuiTextList( final String relationsFilePath,
+                                         final Collection<String> diseaseCuis,
+                                         final Collection<String> congenitalCuis ) {
+      final Collection<String> usedCuis = new HashSet<String>();
+      final Map<String,CuiRelations> cuiRelationsMap = CuiRelationsMapReader.readCuiRelationsMap( relationsFilePath );
+      final String outputPath = TERM_LIST.getValue();
+      System.out.println( "Writing list of Cuis and Preferred Text to " + outputPath );
+      int fakeCui = 0;
+      try {
+         final BufferedWriter writer = FileUtil.createWriter( outputPath );
+         writer.write( "C" + fakeCui + "|Centers For Disease Prevention and Control Top 15 Chronic Diseases" );
+         writer.newLine();
+         fakeCui++;
+         for ( CdcTopFifteen cdcTopFifteen : CdcTopFifteen.values() ) {
+            writer.write( "C" + fakeCui + "|" + formatEnumName( cdcTopFifteen.name() ) );
+            writer.newLine();
+            for ( String cui : cdcTopFifteen.__cuis ) {
+               writeTree( writer, cui, 1, cuiRelationsMap, usedCuis, WRITE_CHILD, WRITE_NARROW );
+            }
+            fakeCui++;
+         }
+         writer.write( "C" + fakeCui + "|Medicare / Medicaid covered additions" );
+         writer.newLine();
+         fakeCui++;
+         for ( MedicareMedicaid medicareMedicaid : MedicareMedicaid.values() ) {
+            writer.write( "C" + fakeCui + "|" + formatEnumName( medicareMedicaid.name() ) );
+            writer.newLine();
+            for ( String cui : medicareMedicaid.__cuis ) {
+               writeTree( writer, cui, 1, cuiRelationsMap, usedCuis, WRITE_CHILD, WRITE_NARROW );
+            }
+            fakeCui++;
+         }
+         writer.write( "C" + fakeCui + "|Congenital Abnormality additions" );
+         writer.newLine();
+         fakeCui++;
+         for ( String cui : congenitalCuis ) {
+            writeTree( writer, cui, 1, cuiRelationsMap, usedCuis, WRITE_CHILD, WRITE_NARROW );
+         }
+         writer.write( "C" + fakeCui + "|Chronic Prefix additions" );
+         writer.newLine();
+         for ( String cui : diseaseCuis ) {
+            if ( usedCuis.contains( cui ) ) {
+               continue;
+            }
+            final CuiRelations cuiRelations = cuiRelationsMap.get( cui );
+            if ( cuiRelations == null ) {
+               continue;
+            }
+            if ( cuiRelations.__text.contains( "chronic" ) ) {
+               writeTree( writer, cui, 1, cuiRelationsMap, usedCuis, WRITE_CHILD, WRITE_NARROW );
+            }
+         }
+         writer.close();
+      } catch ( IOException ioE ) {
+         System.err.println( "Error writing Relations on line " + usedCuis.size() + " in file " + outputPath );
+      }
+      System.out.println( "Wrote " + usedCuis.size() + " relations to " + outputPath );
+   }
+
+   static private void writeTree( final BufferedWriter writer, final String cui, final int level,
+                                                final Map<String,CuiRelations> cuiRelationsMap,
+                                                final Collection<String> usedCuis,
+                                                final boolean writeChild,
+                                                final boolean writeNarrower ) throws IOException {
+      if ( usedCuis.contains( cui ) ) {
+         return;
+      }
+      final CuiRelations cuiRelations = cuiRelationsMap.get( cui );
+      if ( cuiRelations == null ) {
+         return;
+      }
+      usedCuis.add( cui );
+      final StringBuilder sb = new StringBuilder();
+      sb.append( cui ).append( "|" );
+      for ( int i=0; i<level; i++ ) {
+         sb.append( "\t" );
+      }
+      sb.append( cuiRelations.__text );
+      writer.write( sb.toString() );
+      writer.newLine();
+      if ( writeChild ) {
+         writeChildren(  writer, cuiRelations.__children, level + 1, cuiRelationsMap, usedCuis, true, writeNarrower );
+      }
+      if ( writeNarrower ) {
+         writeChildren(  writer, cuiRelations.__narrowers, level + 1, cuiRelationsMap, usedCuis, writeChild, true );
+      }
+      for ( String synonymCui : cuiRelations.__synonyms ) {
+         writeTree( writer, synonymCui, level, cuiRelationsMap, usedCuis, writeChild, writeNarrower );
+      }
+   }
+
+   static private void writeChildren(  final BufferedWriter writer, final Collection<String> children, final int level,
+                                       final Map<String,CuiRelations> cuiRelationsMap,
+                                       final Collection<String> usedCuis,
+                                       final boolean writeChild,
+                                       final boolean writeNarrower ) throws IOException {
+      final List<String> sortedChildren = new ArrayList<String>( children );
+      Collections.sort( sortedChildren );
+      for ( String childCui : sortedChildren ) {
+         // if the child is a CDC or MEDI* root then don't use it yet
+         boolean isCdcCui = false;
+         for ( CdcTopFifteen cdcTopFifteen : CdcTopFifteen.values() ) {
+            isCdcCui = cdcTopFifteen.__cuis.contains( childCui );
+            if ( isCdcCui ) {
+               break;
+            }
+         }
+         if ( isCdcCui ) {
+            continue;
+         }
+         boolean isMediCui = false;
+         for ( MedicareMedicaid medicareMedicaid : MedicareMedicaid.values() ) {
+            isMediCui = medicareMedicaid.__cuis.contains( childCui );
+            if ( isMediCui ) {
+               break;
+            }
+         }
+         if ( isMediCui ) {
+            continue;
+         }
+         writeTree( writer, childCui, level, cuiRelationsMap, usedCuis, writeChild, writeNarrower );
+      }
+   }
+
+   static private String formatEnumName( final String name ) {
+      final String[] splits = name.split( "_" );
+      final StringBuilder sb = new StringBuilder();
+      for ( String split : splits ) {
+         sb.append( split.charAt( 0 ) );
+         sb.append( split.substring( 1, split.length() ).toLowerCase() );
+         sb.append( " " );
+      }
+      return sb.toString().trim();
+   }
+
+
+}

Propchange: ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/ChronicDiseaseLister.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/UmlsSourceNamesPrinter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/UmlsSourceNamesPrinter.java?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/UmlsSourceNamesPrinter.java (added)
+++ ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/UmlsSourceNamesPrinter.java Fri May  2 17:47:44 2014
@@ -0,0 +1,53 @@
+package org.apache.ctakes.dictionarytool;
+
+import org.apache.ctakes.dictionarytool.util.CreatorProperties;
+import org.apache.ctakes.dictionarytool.util.FileUtil;
+import org.apache.ctakes.dictionarytool.util.index.MrconsoIndex;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.UMLS_ROOT;
+import static org.apache.ctakes.dictionarytool.util.UmlsFileName.CUI_TERM_MAP;
+
+/**
+ * Prints all of the Source names in the User's UMLS Installation
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/28/14
+ */
+public class UmlsSourceNamesPrinter {
+
+//   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\Data\\UMLS\\2011AB\\META" };
+
+   public static void main( final String[] args ) {
+      // Set properties (user options) used to create the dictionary
+      final CreatorProperties properties = new CreatorProperties( args );
+      //      final CreatorProperties properties = new CreatorProperties( DEBUG_ARGS );
+      final String consoPath = UMLS_ROOT.getValue() + '/' + CUI_TERM_MAP._filename;
+      final Collection<String> sources = new HashSet<String>();
+      try {
+         final BufferedReader reader = FileUtil.createReader( consoPath );
+         List<String> tokens = FileUtil.readBsvTokens( reader, consoPath );
+         while ( tokens != null ) {
+            final String source = tokens.get( MrconsoIndex.SOURCE._index );
+            sources.add( source );
+            tokens = FileUtil.readBsvTokens( reader, consoPath );
+         }
+         reader.close();
+      } catch ( IOException ioE ) {
+         System.err.println( ioE.getMessage() );
+      }
+      final List<String> sortedSources = new ArrayList<String>( sources );
+      Collections.sort( sortedSources );
+      for ( String source : sortedSources ) {
+         System.out.println( source );
+      }
+   }
+
+}

Propchange: ctakes/sandbox/dictionarytool/example/org/apache/ctakes/dictionarytool/UmlsSourceNamesPrinter.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java (added)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java Fri May  2 17:47:44 2014
@@ -0,0 +1,122 @@
+package org.apache.ctakes.dictionarytool;
+
+import org.apache.ctakes.dictionarytool.reader.OrangebookReader;
+import org.apache.ctakes.dictionarytool.reader.SourceTypeListReader;
+import org.apache.ctakes.dictionarytool.reader.TuiListReader;
+import org.apache.ctakes.dictionarytool.reader.UmlsCodesForCuisReader;
+import org.apache.ctakes.dictionarytool.reader.UmlsCuisForTextsReader;
+import org.apache.ctakes.dictionarytool.util.CreatorProperties;
+import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
+import org.apache.ctakes.dictionarytool.util.UmlsSourceTypeCuiValidator;
+import org.apache.ctakes.dictionarytool.util.UmlsTermUtil;
+import org.apache.ctakes.dictionarytool.writer.CuiCodesMapWriter;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.FORMAT_DATA;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.ORANGE_BOOK;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.SOURCE;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.TERM_LIST;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.TUI_LIST;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.UMLS_ROOT;
+import static org.apache.ctakes.dictionarytool.util.UmlsFileName.CUI_TERM_MAP;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/27/14
+ */
+public class CodeMapCreator {
+
+   static private final Logger LOGGER = Logger.getLogger( "CodeMapCreator" );
+
+
+//   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\Data\\UMLS\\2011AB\\META",
+//                                               "-ob",
+//                                               "C:\\Spiffy\\App\\umls\\2013AA\\2013AA\\Sean\\OrangeBook\\EOBZIP_2013_07_08-30_Fixed_PE\\Products.txt",
+//                                               "-ol", "C:/Spiffy/Output/DictionaryToolTest/SnomedCodeMap.bsv"
+//   };
+
+
+   public static void main( final String[] args ) {
+      // Set properties (user options) used to create the dictionary
+      final CreatorProperties properties = new CreatorProperties( args );
+//            final CreatorProperties properties = new CreatorProperties( DEBUG_ARGS );
+      // Set up the term utility
+      final UmlsTermUtil umlsTermUtil = new UmlsTermUtil( FORMAT_DATA.getValue() );
+      // Write the non-medication terms
+      codeMapSemanticGroups();
+      // Write the medication terms
+      if ( ORANGE_BOOK.hasValue() ) {
+         codeMapOrangebook( umlsTermUtil );
+      }
+   }
+
+   static private void codeMapSemanticGroups() {
+      // Read wanted Sources
+      final Collection<String> wantedSources = SourceTypeListReader.readSourceTypes( SOURCE.getValue() );
+      // Read wanted Tuis
+      final Collection<String> wantedTuis = TuiListReader.readTuiList( TUI_LIST.getValue() );
+      if ( wantedTuis == null || wantedTuis.isEmpty() ) {
+         LOGGER.severe( "No valid TUI codes found in " + TUI_LIST.getValue() );
+         System.exit( 1 );
+      }
+      // get the valid Cuis for all wanted Tuis
+      final Map<String, Collection<String>> validCuisAndTuis
+            = CuiTuiUtil.getValidCuisAndTuis( UMLS_ROOT.getValue(), wantedSources, wantedTuis );
+      // Get the codes for all cuis
+      final Map<String, Map<String, Collection<String>>> cuisAndCodes
+            = UmlsCodesForCuisReader.readCodesForCuis( UMLS_ROOT.getValue() + '/' + CUI_TERM_MAP._filename,
+                                                       validCuisAndTuis.keySet() );
+      // Output
+      if ( TERM_LIST.hasValue() ) {
+         CuiCodesMapWriter.writeCuiCodes( TERM_LIST.getValue(), cuisAndCodes );
+         //      } else if ( DATA_BASE.hasValue() && DATA_TABLE.hasValue() ) {
+         //         if ( isRareWordIndex ) {
+         //            RareWordDbWriter.writeTermsToDb( validCuisAndTuis, cuiTexts,
+         //                                             DATA_BASE.getValue(), "sa", "", DATA_TABLE.getValue() );
+         //         } else {
+         //            FirstWordDbWriter.writeTermsToDb( validCuisAndTuis, cuiTexts,
+         //                                              DATA_BASE.getValue(), "sa", "", DATA_TABLE.getValue() );
+         //         }
+      }
+      LOGGER.info( "Done Code-Mapping Non-Medication Cuis" );
+   }
+
+
+   static private void codeMapOrangebook( final UmlsTermUtil umlsTermUtil ) {
+      final Collection<String> orangeBookTexts = OrangebookReader.readOrangeBookTexts( ORANGE_BOOK.getValue() );
+      final Map<String, Collection<String>> cuiTexts
+            = UmlsCuisForTextsReader.readCuisForTexts( UMLS_ROOT.getValue() + '/' + CUI_TERM_MAP._filename,
+                                                       orangeBookTexts, umlsTermUtil );
+      final Collection<String> validCuis
+            = UmlsSourceTypeCuiValidator.getSourceTypeValidCuis( UMLS_ROOT.getValue() + '/' + CUI_TERM_MAP._filename,
+                                                                 Arrays.asList( "RXNORM" ),
+                                                                 cuiTexts.keySet() );
+      // Get the codes for all cuis
+      final Map<String, Map<String, Collection<String>>> cuisAndCodes
+            = UmlsCodesForCuisReader.readCodesForCuis( UMLS_ROOT.getValue() + '/' + CUI_TERM_MAP._filename,
+                                                       validCuis );
+      // Output
+      if ( TERM_LIST.hasValue() ) {
+         CuiCodesMapWriter.writeCuiCodes( TERM_LIST.getValue(), cuisAndCodes );
+         //      } else if ( DATA_BASE.hasValue() && DATA_TABLE.hasValue() ) {
+         //         final Map<String,Collection<String>> cuiTuis
+         //               = UmlsTuisForCuisReader.readUmlsTuisForCuis( UMLS_ROOT.getValue() + '/' + CUI_TUI_MAP._filename,
+         //                                                            validCuis );
+         //         if ( isRareWordIndex ) {
+         //            RareWordDbWriter.writeTermsToDb( cuiTuis, validCuiTexts,
+         //                                             DATA_BASE.getValue(), "sa", "", DATA_TABLE.getValue() );
+         //         } else {
+         //            FirstWordDbWriter.writeTermsToDb( cuiTuis, validCuiTexts,
+         //                                              DATA_BASE.getValue(), "sa", "", DATA_TABLE.getValue() );
+         //         }
+      }
+      LOGGER.info( "Done Code-Mapping Medication Cuis" );
+   }
+
+
+}

Propchange: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/CodeMapCreator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/DictionaryCreator.java Fri May  2 17:47:44 2014
@@ -4,10 +4,10 @@ import org.apache.ctakes.dictionarytool.
 import org.apache.ctakes.dictionarytool.reader.SourceTypeListReader;
 import org.apache.ctakes.dictionarytool.reader.TuiListReader;
 import org.apache.ctakes.dictionarytool.reader.UmlsCuisForTextsReader;
-import org.apache.ctakes.dictionarytool.reader.UmlsCuisForTuisReader;
 import org.apache.ctakes.dictionarytool.reader.UmlsTextsForCuisReader;
 import org.apache.ctakes.dictionarytool.reader.UmlsTuisForCuisReader;
 import org.apache.ctakes.dictionarytool.util.CreatorProperties;
+import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
 import org.apache.ctakes.dictionarytool.util.UmlsSourceTypeCuiValidator;
 import org.apache.ctakes.dictionarytool.util.UmlsTermUtil;
 import org.apache.ctakes.dictionarytool.writer.CuiTextsMapWriter;
@@ -20,9 +20,16 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.logging.Logger;
 
-
-import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.*;
-
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.DATA_BASE;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.DATA_TABLE;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.FORMAT_DATA;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.ORANGE_BOOK;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.SOURCE;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.TERM_LIST;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.TUI_LIST;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.UMLS_ROOT;
+import static org.apache.ctakes.dictionarytool.util.UmlsFileName.CUI_TERM_MAP;
+import static org.apache.ctakes.dictionarytool.util.UmlsFileName.CUI_TUI_MAP;
 
 /**
  * Author: SPF
@@ -33,21 +40,19 @@ public class DictionaryCreator {
 
    static private final Logger LOGGER = Logger.getLogger( "DictionaryCreator" );
 
-   static private final String UMLS_CUI_TUI_MAP = "MRSTY.RRF";
-   static private final String UMLS_TERM_LIST = "MRCONSO.RRF";
-
-//   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\Data\\UMLS\\2011AB\\META",
-//                               "-ob", "C:\\Spiffy\\App\\umls\\2013AA\\2013AA\\Sean\\OrangeBook\\EOBZIP_2013_07_08-30_Fixed_PE\\Products.txt",
-//                               "-ol", "C:/Spiffy/Output/DictionaryToolTest/defaults.bsv" };
-//                               "-db", "jdbc:hsqldb:file:C:/Spiffy/Projects/RareWordDict/Sno2011ab_ctakes_Mem/cTakesUmls",
-//                               "-tbl", "CTAKES_UMLS" };
 
+   //   static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\Data\\UMLS\\2011AB\\META",
+   //                               "-ob", "C:\\Spiffy\\App\\umls\\2013AA\\2013AA\\Sean\\OrangeBook\\EOBZIP_2013_07_08-30_Fixed_PE\\Products.txt",
+   //                               "-ol", "C:/Spiffy/Output/DictionaryToolTest/defaults.bsv"
+   //                               "-db", "jdbc:hsqldb:file:C:/Spiffy/Projects/RareWordDict/Sno2011ab_ctakes_Mem/cTakesUmls",
+   //                               "-tbl", "CTAKES_UMLS"
+   //                            };
 
 
    public static void main( final String[] args ) {
       // Set properties (user options) used to create the dictionary
       final CreatorProperties properties = new CreatorProperties( args );
-//      final CreatorProperties properties = new CreatorProperties( DEBUG_ARGS );
+      //      final CreatorProperties properties = new CreatorProperties( DEBUG_ARGS );
       // Set up the term utility
       final UmlsTermUtil umlsTermUtil = new UmlsTermUtil( FORMAT_DATA.getValue() );
       // Write the non-medication terms
@@ -59,46 +64,21 @@ public class DictionaryCreator {
    }
 
 
-
-   static private Map<String,Collection<String>> getValidCuisAndTuis( final String umlsPath,
-                                                                      final Collection<String> sources,
-                                                                      final Collection<String> tuis ) {
-      // get all the Cuis for the wanted Tuis.  Key = Cui, Value = Tuis to which the Cui belongs
-      final Map<String, Collection<String>> wantedCuisAndTuis
-            = UmlsCuisForTuisReader.readUmlsCuisForTuis( umlsPath + '/' + UMLS_CUI_TUI_MAP, tuis );
-      if ( sources.isEmpty() ) {
-         // No specified source types, assume that all sources are valid
-         return wantedCuisAndTuis;
-      }
-      // filter out the Cuis that do not belong to the given sources
-      final Collection<String> validCuis
-            = UmlsSourceTypeCuiValidator.getSourceTypeValidCuis( umlsPath + '/' + UMLS_TERM_LIST,
-                                                                 sources,
-                                                                 wantedCuisAndTuis.keySet() );
-      // Key = Cui, Value = Tuis to which the Cui belongs
-      final Map<String,Collection<String>> validCuisAndTuis = new HashMap<String, Collection<String>>();
-      for ( String validCui : validCuis ) {
-         validCuisAndTuis.put( validCui, wantedCuisAndTuis.get( validCui ) );
-      }
-      return validCuisAndTuis;
-   }
-
-
    static private void writeSemanticGroups( final UmlsTermUtil umlsTermUtil, final boolean isRareWordIndex ) {
       // Read wanted Sources
-      final Collection<String> sources = SourceTypeListReader.readSourceTypes( SOURCE.getValue() );
+      final Collection<String> wantedSources = SourceTypeListReader.readSourceTypes( SOURCE.getValue() );
       // Read wanted Tuis
-      final Collection<String> tuis = TuiListReader.readTuiList( TUI_LIST.getValue() );
-      if ( tuis == null || tuis.isEmpty() ) {
+      final Collection<String> wantedTuis = TuiListReader.readTuiList( TUI_LIST.getValue() );
+      if ( wantedTuis == null || wantedTuis.isEmpty() ) {
          LOGGER.severe( "No valid TUI codes found in " + TUI_LIST.getValue() );
          System.exit( 1 );
       }
       // get the valid Cuis for all wanted Tuis
-      final Map<String,Collection<String>> validCuisAndTuis
-            = getValidCuisAndTuis( UMLS_ROOT.getValue(), sources, tuis );
+      final Map<String, Collection<String>> validCuisAndTuis
+            = CuiTuiUtil.getValidCuisAndTuis( UMLS_ROOT.getValue(), wantedSources, wantedTuis );
       // Get the texts for all cuis
       final Map<String, Collection<String>> cuiTexts
-            = UmlsTextsForCuisReader.readTextsForCuis( UMLS_ROOT.getValue() + '/' + UMLS_TERM_LIST,
+            = UmlsTextsForCuisReader.readTextsForCuis( UMLS_ROOT.getValue() + '/' + CUI_TERM_MAP._filename,
                                                        validCuisAndTuis.keySet(), umlsTermUtil );
       if ( TERM_LIST.hasValue() ) {
          CuiTextsMapWriter.writeCuiTexts( TERM_LIST.getValue(), cuiTexts );
@@ -115,18 +95,16 @@ public class DictionaryCreator {
    }
 
 
-
    static private void writeOrangebook( final UmlsTermUtil umlsTermUtil, final boolean isRareWordIndex ) {
-
       final Collection<String> orangeBookTexts = OrangebookReader.readOrangeBookTexts( ORANGE_BOOK.getValue() );
       final Map<String, Collection<String>> cuiTexts
-            = UmlsCuisForTextsReader.readCuisForTexts( UMLS_ROOT.getValue() + '/' + UMLS_TERM_LIST,
+            = UmlsCuisForTextsReader.readCuisForTexts( UMLS_ROOT.getValue() + '/' + CUI_TERM_MAP._filename,
                                                        orangeBookTexts, umlsTermUtil );
       final Collection<String> validCuis
-            = UmlsSourceTypeCuiValidator.getSourceTypeValidCuis( UMLS_ROOT.getValue() + '/' + UMLS_TERM_LIST,
+            = UmlsSourceTypeCuiValidator.getSourceTypeValidCuis( UMLS_ROOT.getValue() + '/' + CUI_TERM_MAP._filename,
                                                                  Arrays.asList( "RXNORM" ),
                                                                  cuiTexts.keySet() );
-      final Map<String,Collection<String>> validCuiTexts = new HashMap<String, Collection<String>>( cuiTexts.size() );
+      final Map<String, Collection<String>> validCuiTexts = new HashMap<String, Collection<String>>( cuiTexts.size() );
       for ( String cui : validCuis ) {
          final Collection<String> texts = cuiTexts.get( cui );
          if ( cui.equals( "C0028128" ) ) {
@@ -138,8 +116,9 @@ public class DictionaryCreator {
       if ( TERM_LIST.hasValue() ) {
          CuiTextsMapWriter.writeCuiTexts( TERM_LIST.getValue(), validCuiTexts );
       } else if ( DATA_BASE.hasValue() && DATA_TABLE.hasValue() ) {
-         final Map<String,Collection<String>> cuiTuis
-               = UmlsTuisForCuisReader.readUmlsTuisForCuis( UMLS_ROOT.getValue() + '/' + UMLS_CUI_TUI_MAP, validCuis );
+         final Map<String, Collection<String>> cuiTuis
+               = UmlsTuisForCuisReader.readUmlsTuisForCuis( UMLS_ROOT.getValue() + '/' + CUI_TUI_MAP._filename,
+                                                            validCuis );
          if ( isRareWordIndex ) {
             RareWordDbWriter.writeTermsToDb( cuiTuis, validCuiTexts,
                                              DATA_BASE.getValue(), "sa", "", DATA_TABLE.getValue() );

Added: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/RelationsCreator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/RelationsCreator.java?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/RelationsCreator.java (added)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/RelationsCreator.java Fri May  2 17:47:44 2014
@@ -0,0 +1,99 @@
+package org.apache.ctakes.dictionarytool;
+
+import org.apache.ctakes.dictionarytool.reader.SourceTypeListReader;
+import org.apache.ctakes.dictionarytool.reader.TuiListReader;
+import org.apache.ctakes.dictionarytool.reader.UmlsRelationsForCuisReader;
+import org.apache.ctakes.dictionarytool.reader.UmlsTextsForCuisReader;
+import org.apache.ctakes.dictionarytool.util.CreatorProperties;
+import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
+import org.apache.ctakes.dictionarytool.util.UmlsTermUtil;
+import org.apache.ctakes.dictionarytool.writer.CuiRelationsMapWriter;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.logging.Logger;
+
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.FORMAT_DATA;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.SOURCE;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.TERM_LIST;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.TUI_LIST;
+import static org.apache.ctakes.dictionarytool.util.CreatorProperties.Option.UMLS_ROOT;
+import static org.apache.ctakes.dictionarytool.util.UmlsFileName.CUI_TERM_MAP;
+import static org.apache.ctakes.dictionarytool.util.UmlsFileName.RELATION_LIST;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/26/14
+ */
+public class RelationsCreator {
+
+   static private final Logger LOGGER = Logger.getLogger( "RelationsCreator" );
+
+
+   //      static private final String[] DEBUG_ARGS = {"-umls", "C:\\Spiffy\\Data\\UMLS\\2011AB\\META",
+   //                                  "-ol", "C:/Spiffy/Output/DictionaryToolTest/SnomedRelations.bsv",
+   //                                  "-ol", "C:/Spiffy/Output/DictionaryToolTest/DisoRelations.bsv",
+   //                                  "-tui", "data/default/CtakesDisoTuis.txt",
+   //      };
+
+
+   public static void main( final String[] args ) {
+      // Set properties (user options) used to create the dictionary
+      final CreatorProperties properties = new CreatorProperties( args );
+      //      final CreatorProperties properties = new CreatorProperties( DEBUG_ARGS );
+      // Set up the term utility
+      final UmlsTermUtil umlsTermUtil = new UmlsTermUtil( FORMAT_DATA.getValue() );
+      // Write the non-medication relations
+      writeRelations( umlsTermUtil );
+   }
+
+
+   // according to http://aclweb.org/anthology/N/N13/N13-3007.pdf  the only difference between parent/child and
+   // broader/narrower is that parent/child came from the original source, whereas broader/narrower was added by
+   // the UMLS editors.  Still, it may be prudent to separate the two.
+   static private void writeRelations( final UmlsTermUtil umlsTermUtil ) {
+      // Read wanted Sources
+      final Collection<String> wantedSources = SourceTypeListReader.readSourceTypes( SOURCE.getValue() );
+      // Read wanted Tuis
+      final Collection<String> wantedTuis = TuiListReader.readTuiList( TUI_LIST.getValue() );
+      if ( wantedTuis == null || wantedTuis.isEmpty() ) {
+         LOGGER.severe( "No valid TUI codes found in " + TUI_LIST.getValue() );
+         System.exit( 1 );
+      }
+      // get the valid Cuis for all wanted Tuis
+      final Map<String, Collection<String>> validCuisAndTuis
+            = CuiTuiUtil.getValidCuisAndTuis( UMLS_ROOT.getValue(), wantedSources, wantedTuis );
+      // Get the preferred texts for all cuis
+      final Map<String, Collection<String>> cuiTexts
+            = UmlsTextsForCuisReader.readTextsForCuis( UMLS_ROOT.getValue() + '/' + CUI_TERM_MAP._filename,
+                                                       validCuisAndTuis.keySet(), umlsTermUtil, true, false );
+      final Map<String, Collection<String>> cuiSynonyms = new HashMap<String, Collection<String>>( cuiTexts.size() );
+      final Map<String, Collection<String>> cuiParents = new HashMap<String, Collection<String>>( cuiTexts.size() );
+      final Map<String, Collection<String>> cuiChildren = new HashMap<String, Collection<String>>( cuiTexts.size() );
+      final Map<String, Collection<String>> cuiBroadeners = new HashMap<String, Collection<String>>( cuiTexts.size() );
+      final Map<String, Collection<String>> cuiNarrowers = new HashMap<String, Collection<String>>( cuiTexts.size() );
+      final Map<String, Collection<String>> cuiSimilars = new HashMap<String, Collection<String>>( cuiTexts.size() );
+      UmlsRelationsForCuisReader.readRelationsForCuis( UMLS_ROOT.getValue() + '/' + RELATION_LIST._filename,
+                                                       validCuisAndTuis.keySet(),
+                                                       cuiSynonyms, cuiParents, cuiChildren,
+                                                       cuiBroadeners, cuiNarrowers, cuiSimilars );
+      if ( TERM_LIST.hasValue() ) {
+         CuiRelationsMapWriter.writeCuiRelations( TERM_LIST.getValue(),
+                                                  cuiTexts, cuiSynonyms, cuiParents, cuiChildren,
+                                                  cuiBroadeners, cuiNarrowers, cuiSimilars );
+         //      } else if ( DATA_BASE.hasValue() && DATA_TABLE.hasValue() ) {
+         //         if ( isRareWordIndex ) {
+         //            RareWordDbWriter.writeTermsToDb( validCuisAndTuis, cuiTexts,
+         //                                             DATA_BASE.getValue(), "sa", "", DATA_TABLE.getValue() );
+         //         } else {
+         //            FirstWordDbWriter.writeTermsToDb( validCuisAndTuis, cuiTexts,
+         //                                              DATA_BASE.getValue(), "sa", "", DATA_TABLE.getValue() );
+         //         }
+      }
+      LOGGER.info( "Done Writing Non-Medication Relations" );
+   }
+
+
+}

Propchange: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/RelationsCreator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiRelationsMapReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiRelationsMapReader.java?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiRelationsMapReader.java (added)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiRelationsMapReader.java Fri May  2 17:47:44 2014
@@ -0,0 +1,93 @@
+package org.apache.ctakes.dictionarytool.reader;
+
+import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
+import org.apache.ctakes.dictionarytool.util.FileUtil;
+import org.apache.ctakes.dictionarytool.util.TokenUtil;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/27/14
+ */
+final public class CuiRelationsMapReader {
+
+   static private enum CuiRelationsIndex {
+      CUI( 0 ), TEXT( 1 ), SYNONYMS( 2 ), PARENTS( 3 ), CHILDREN( 4 ), BROADER( 5 ), NARROWER( 6 ), SIMILAR( 7 );
+      final private int _index;
+
+      private CuiRelationsIndex( final int index ) {
+         _index = index;
+      }
+   }
+
+   final static public class CuiRelations {
+      final public String __cui;
+      final public String __text;
+      final public Collection<String> __synonyms;
+      final public Collection<String> __parents;
+      final public Collection<String> __children;
+      final public Collection<String> __broadeners;
+      final public Collection<String> __narrowers;
+      final public Collection<String> __similars;
+
+      private CuiRelations( final String cui, final String text, final Collection<String> synonyms,
+                            final Collection<String> parents, final Collection<String> children,
+                            final Collection<String> broadeners, final Collection<String> narrowers,
+                            final Collection<String> similars ) {
+         __cui = cui;
+         __text = text;
+         __synonyms = synonyms;
+         __parents = parents;
+         __children = children;
+         __broadeners = broadeners;
+         __narrowers = narrowers;
+         __similars = similars;
+      }
+   }
+
+   private CuiRelationsMapReader() {
+   }
+
+   static public Map<String, CuiRelations> readCuiRelationsMap( final String relationsPath ) {
+      System.out.println( "Compiling map of Cuis and Relations using " + relationsPath );
+      final Map<String, CuiRelations> cuiRelationsMap = new HashMap<String, CuiRelations>();
+      long lineCount = 0;
+      try {
+         final BufferedReader reader = FileUtil.createReader( relationsPath );
+         List<String> tokens = FileUtil.readBsvTokens( reader, relationsPath );
+         while ( tokens != null ) {
+            lineCount++;
+            if ( tokens.size() > CuiRelationsIndex.SYNONYMS._index ) {
+               final String cui = CuiTuiUtil.getAsCui( tokens.get( CuiRelationsIndex.CUI._index ) );
+               final String text = tokens.get( CuiRelationsIndex.TEXT._index );
+               final Collection<String> synonyms = TokenUtil.getCsvItems( tokens.get( CuiRelationsIndex.SYNONYMS._index ) );
+               final Collection<String> parents = TokenUtil.getCsvItems( tokens.get( CuiRelationsIndex.PARENTS._index ) );
+               final Collection<String> children = TokenUtil.getCsvItems( tokens.get( CuiRelationsIndex.CHILDREN._index ) );
+               final Collection<String> broadeners = TokenUtil.getCsvItems( tokens.get( CuiRelationsIndex.BROADER._index ) );
+               final Collection<String> narrowers = TokenUtil.getCsvItems( tokens.get( CuiRelationsIndex.NARROWER._index ) );
+               final Collection<String> similars = TokenUtil.getCsvItems( tokens.get( CuiRelationsIndex.SIMILAR._index ) );
+               final CuiRelations cuiRelations = new CuiRelations( cui, text, synonyms, parents, children,
+                                                                   broadeners, narrowers, similars );
+               cuiRelationsMap.put( cui, cuiRelations );
+            }
+            if ( lineCount % 100000 == 0 ) {
+               System.out.println( "File Line " + lineCount + "\t Cuis " + cuiRelationsMap.size() );
+            }
+            tokens = FileUtil.readBsvTokens( reader, relationsPath );
+         }
+         reader.close();
+      } catch ( IOException ioE ) {
+         System.err.println( ioE.getMessage() );
+      }
+      System.out.println( "File Line " + lineCount + "\t Cuis " + cuiRelationsMap.size() );
+      return cuiRelationsMap;
+   }
+
+}

Propchange: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiRelationsMapReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiTextsMapReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiTextsMapReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiTextsMapReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/CuiTextsMapReader.java Fri May  2 17:47:44 2014
@@ -21,9 +21,14 @@ final public class CuiTextsMapReader {
    private CuiTextsMapReader() {
    }
 
-   static private final int CUI_INDEX = 0;
-   static private final int TEXT_INDEX = 1;
+   static private enum CuiTextIndex {
+      CUI( 0 ), TEXT( 1 );
+      final private int _index;
 
+      private CuiTextIndex( final int index ) {
+         _index = index;
+      }
+   }
 
    private Map<String, Collection<String>> readCuiTexts( final String termsPath ) {
       System.out.println( "Compiling map of Cuis and Texts using " + termsPath );
@@ -34,9 +39,9 @@ final public class CuiTextsMapReader {
          List<String> tokens = FileUtil.readBsvTokens( reader, termsPath );
          while ( tokens != null ) {
             lineCount++;
-            if ( tokens.size() > TEXT_INDEX ) {
-               final String cui = CuiTuiUtil.getAsCui( tokens.get( CUI_INDEX ) );
-               final String text = tokens.get( TEXT_INDEX );
+            if ( tokens.size() > CuiTextIndex.TEXT._index ) {
+               final String cui = CuiTuiUtil.getAsCui( tokens.get( CuiTextIndex.CUI._index ) );
+               final String text = tokens.get( CuiTextIndex.TEXT._index );
                Collection<String> textsForCui = cuiTexts.get( cui );
                if ( textsForCui == null ) {
                   textsForCui = new HashSet<String>( 1 );

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/OrangebookReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/OrangebookReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/OrangebookReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/OrangebookReader.java Fri May  2 17:47:44 2014
@@ -16,7 +16,8 @@ import java.util.List;
  */
 final public class OrangebookReader {
 
-   private OrangebookReader() {}
+   private OrangebookReader() {
+   }
 
    static private final int INGREDIENT_INDEX = 0;
    static private final int TRADENAME_INDEX = 2;
@@ -49,7 +50,7 @@ final public class OrangebookReader {
          reader.close();
          System.out.println( "File Lines " + lineCount + "\t Ingredients " + ingredients.size()
                                    + "\t TradeNames " + tradeNames.size() );
-      } catch (IOException ioE ) {
+      } catch ( IOException ioE ) {
          System.err.println( "Bad Line: " + lineCount );
       }
       ingredients.addAll( tradeNames );

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SemanticTypeListReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SemanticTypeListReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SemanticTypeListReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SemanticTypeListReader.java Fri May  2 17:47:44 2014
@@ -11,7 +11,8 @@ import java.util.Collection;
  */
 final public class SemanticTypeListReader {
 
-   private SemanticTypeListReader() {}
+   private SemanticTypeListReader() {
+   }
 
    static public Collection<String> readSemanticTypes( final String semanticTypesPath ) {
       return FileUtil.readOneColumn( semanticTypesPath, "list of Semantic Types" );

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SourceTypeListReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SourceTypeListReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SourceTypeListReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/SourceTypeListReader.java Fri May  2 17:47:44 2014
@@ -11,7 +11,8 @@ import java.util.Collection;
  */
 final public class SourceTypeListReader {
 
-   private SourceTypeListReader() {}
+   private SourceTypeListReader() {
+   }
 
    static public Collection<String> readSourceTypes( final String sourceTypesPath ) {
       return FileUtil.readOneColumn( sourceTypesPath, "list of Source Types" );

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/TuiListReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/TuiListReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/TuiListReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/TuiListReader.java Fri May  2 17:47:44 2014
@@ -13,7 +13,8 @@ import java.util.HashSet;
  */
 final public class TuiListReader {
 
-   private TuiListReader() {}
+   private TuiListReader() {
+   }
 
 
    static public Collection<String> readTuiList( final String tuiListPath ) {

Added: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java (added)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java Fri May  2 17:47:44 2014
@@ -0,0 +1,80 @@
+package org.apache.ctakes.dictionarytool.reader;
+
+import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
+import org.apache.ctakes.dictionarytool.util.FileUtil;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.CUI;
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.LANGUAGE;
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.SOURCE;
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.SOURCE_CODE;
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.TEXT;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/28/14
+ */
+final public class UmlsCodesForCuisReader {
+
+   private UmlsCodesForCuisReader() {
+   }
+
+   static public Map<String, Map<String, Collection<String>>> readCodesForCuis( final String rrfPath,
+                                                                                final Collection<String> wantedCuis ) {
+      final Collection<String> codeSources
+            = new HashSet<String>( Arrays.asList( "ICD10PCS", "ICD9CM", "RXNORM", "SNOMEDCT" ) );
+      long lineCount = 0;
+      long codeCount = 0;
+      final Map<String, Map<String, Collection<String>>> cuisAndCodes
+            = new HashMap<String, Map<String, Collection<String>>>( wantedCuis.size() );
+      try {
+         final BufferedReader reader = FileUtil.createReader( rrfPath );
+         List<String> tokens = FileUtil.readBsvTokens( reader, rrfPath );
+         while ( tokens != null ) {
+            lineCount++;
+            if ( tokens.size() > TEXT._index
+                  && tokens.get( LANGUAGE._index ).equals( "ENG" )
+                  && codeSources.contains( tokens.get( SOURCE._index ) ) ) {
+               final String cui = CuiTuiUtil.getAsCui( tokens.get( CUI._index ) );
+               if ( wantedCuis.contains( cui ) ) {
+                  Map<String, Collection<String>> codeMap = cuisAndCodes.get( cui );
+                  if ( codeMap == null ) {
+                     codeMap = new HashMap<String, Collection<String>>( 1 );
+                     cuisAndCodes.put( cui, codeMap );
+                  }
+                  Collection<String> codes = codeMap.get( tokens.get( SOURCE._index ) );
+                  if ( codes == null ) {
+                     codes = new HashSet<String>( 1 );
+                     codeMap.put( tokens.get( SOURCE._index ), codes );
+                  }
+                  if ( codes.add( tokens.get( SOURCE_CODE._index ) ) ) {
+                     codeCount++;
+                  }
+               }
+            }
+            if ( lineCount % 2000 == 0 ) {
+               System.out.print( "." );
+               if ( lineCount % 100000 == 0 ) {
+                  System.out.println( "File Line " + lineCount + "\t Codes " + codeCount );
+               }
+            }
+            tokens = FileUtil.readBsvTokens( reader, rrfPath );
+         }
+         reader.close();
+      } catch ( IOException ioE ) {
+         System.err.println( ioE.getMessage() );
+      }
+      System.out.println( "File Line " + lineCount + "\t Codes " + codeCount );
+      return cuisAndCodes;
+   }
+
+}

Propchange: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCodesForCuisReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTextsReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTextsReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTextsReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTextsReader.java Fri May  2 17:47:44 2014
@@ -2,7 +2,6 @@ package org.apache.ctakes.dictionarytool
 
 import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
 import org.apache.ctakes.dictionarytool.util.FileUtil;
-import org.apache.ctakes.dictionarytool.util.RRF_INDEX;
 import org.apache.ctakes.dictionarytool.util.UmlsTermUtil;
 
 import java.io.BufferedReader;
@@ -12,6 +11,10 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.CUI;
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.LANGUAGE;
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.TEXT;
+
 /**
  * Author: SPF
  * Affiliation: CHIP-NLP
@@ -35,8 +38,8 @@ final public class UmlsCuisForTextsReade
          List<String> tokens = FileUtil.readBsvTokens( reader, rrfPath );
          while ( tokens != null ) {
             lineCount++;
-            if ( tokens.size() > RRF_INDEX.TEXT._index && tokens.get( RRF_INDEX.LANGUAGE._index ).equals( "ENG" ) ) {
-               final String text = tokens.get( RRF_INDEX.TEXT._index );
+            if ( tokens.size() > TEXT._index && tokens.get( LANGUAGE._index ).equals( "ENG" ) ) {
+               final String text = tokens.get( TEXT._index );
                final Collection<String> formattedTexts = umlsTermUtil.getFormattedTexts( text );
                if ( formattedTexts == null || formattedTexts.isEmpty() ) {
                   tokens = FileUtil.readBsvTokens( reader, rrfPath );
@@ -53,13 +56,14 @@ final public class UmlsCuisForTextsReade
                   if ( lineCount % 2000 == 0 ) {
                      System.out.print( "." );
                      if ( lineCount % 100000 == 0 ) {
-                        System.out.println( "File Line " + lineCount + "\t Cuis " + cuisAndText.size() + "\t Terms " + textCount );
+                        System.out.println(
+                              "File Line " + lineCount + "\t Cuis " + cuisAndText.size() + "\t Terms " + textCount );
                      }
                   }
                   tokens = FileUtil.readBsvTokens( reader, rrfPath );
                   continue;
                }
-               final String cui = CuiTuiUtil.getAsCui( tokens.get( RRF_INDEX.CUI._index ) );
+               final String cui = CuiTuiUtil.getAsCui( tokens.get( CUI._index ) );
                Collection<String> textsForCui = cuisAndText.get( cui );
                if ( textsForCui == null ) {
                   cuisAndText.put( cui, formattedTexts );
@@ -73,7 +77,8 @@ final public class UmlsCuisForTextsReade
             if ( lineCount % 2000 == 0 ) {
                System.out.print( "." );
                if ( lineCount % 100000 == 0 ) {
-                  System.out.println( "File Line " + lineCount + "\t Cuis " + cuisAndText.size() + "\t Terms " + textCount );
+                  System.out.println(
+                        "File Line " + lineCount + "\t Cuis " + cuisAndText.size() + "\t Terms " + textCount );
                }
             }
             tokens = FileUtil.readBsvTokens( reader, rrfPath );

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTuisReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTuisReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTuisReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsCuisForTuisReader.java Fri May  2 17:47:44 2014
@@ -11,6 +11,9 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
+import static org.apache.ctakes.dictionarytool.util.index.MrstyIndex.CUI;
+import static org.apache.ctakes.dictionarytool.util.index.MrstyIndex.TUI;
+
 /**
  * Author: SPF
  * Affiliation: CHIP-NLP
@@ -21,11 +24,6 @@ final public class UmlsCuisForTuisReader
    private UmlsCuisForTuisReader() {
    }
 
-//   static private final String CUI_TUI_PATH = "C:/Spiffy/App/umls/2013AA/2013AA/META/MRSTY.RRF";
-
-   static private final int CUI_INDEX = 0;
-   static private final int TUI_INDEX = 1;
-
    static public Map<String, Collection<String>> readUmlsCuisForTuis( final String cuiTuiMapPath,
                                                                       final Collection<String> wantedTuis ) {
       System.out.println( "Compiling list of Cuis with wanted Tuis using " + cuiTuiMapPath );
@@ -37,13 +35,13 @@ final public class UmlsCuisForTuisReader
          List<String> tokens = FileUtil.readBsvTokens( reader, cuiTuiMapPath );
          while ( tokens != null ) {
             lineCount++;
-            if ( tokens.size() > TUI_INDEX ) {
-               final String tui = CuiTuiUtil.getAsTui( tokens.get( TUI_INDEX ) );
+            if ( tokens.size() > TUI._index ) {
+               final String tui = CuiTuiUtil.getAsTui( tokens.get( TUI._index ) );
                if ( !wantedTuis.contains( tui ) ) {
                   tokens = FileUtil.readBsvTokens( reader, cuiTuiMapPath );
                   continue;
                }
-               final String cui = CuiTuiUtil.getAsCui( tokens.get( CUI_INDEX ) );
+               final String cui = CuiTuiUtil.getAsCui( tokens.get( CUI._index ) );
                Collection<String> tuis = wantedCuisAndTuis.get( cui );
                if ( tuis == null ) {
                   tuis = new HashSet<String>( 1 );

Added: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsRelationsForCuisReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsRelationsForCuisReader.java?rev=1591985&view=auto
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsRelationsForCuisReader.java (added)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsRelationsForCuisReader.java Fri May  2 17:47:44 2014
@@ -0,0 +1,169 @@
+package org.apache.ctakes.dictionarytool.reader;
+
+import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
+import org.apache.ctakes.dictionarytool.util.FileUtil;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.ctakes.dictionarytool.util.index.MrrelIndex.CUI_1;
+import static org.apache.ctakes.dictionarytool.util.index.MrrelIndex.CUI_2;
+import static org.apache.ctakes.dictionarytool.util.index.MrrelIndex.REFINED;
+import static org.apache.ctakes.dictionarytool.util.index.MrrelIndex.RELATION;
+
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 3/26/14
+ */
+final public class UmlsRelationsForCuisReader {
+
+   private UmlsRelationsForCuisReader() {
+   }
+
+   private enum RelationCode {
+      PARENT( "PAR" ), BROADER( "RB" ),
+      CHILD( "CHD" ), NARROWER( "RN" ),
+      SYNONYM( "SY" ), POSSIBLE_SYNONYM( "RQ" ), SIMILAR( "RL" );
+      final private String _code;
+
+      private RelationCode( final String code ) {
+         _code = code;
+      }
+
+      static private boolean isParent( final String code ) {
+         return code.equals( PARENT._code );
+      }
+
+      static private boolean isBroader( final String code ) {
+         return code.equals( BROADER._code );
+      }
+
+      static private boolean isChild( final String code ) {
+         return code.equals( CHILD._code );
+      }
+
+      static private boolean isNarrower( final String code ) {
+         return code.equals( NARROWER._code );
+      }
+
+      static private boolean isSynonym( final String code ) {
+         return code.equals( SYNONYM._code );
+      }
+
+      static private boolean isSimilar( final String code ) {
+         return code.equals( POSSIBLE_SYNONYM._code ) || code.equals( SIMILAR._code );
+      }
+
+   }
+
+   static public void readRelationsForCuis( final String rrfPath,
+                                            final Collection<String> wantedCuis,
+                                            final Map<String, Collection<String>> cuiSynonyms,
+                                            final Map<String, Collection<String>> cuiParents,
+                                            final Map<String, Collection<String>> cuiChildren,
+                                            final Map<String, Collection<String>> cuiBroaders,
+                                            final Map<String, Collection<String>> cuiNarrowers,
+                                            final Map<String, Collection<String>> cuiSimilars ) {
+      System.out.println( "Compiling map of Umls Cuis and Relations" );
+      long lineCount = 0;
+      long synonymCount = 0;
+      long progenyCount = 0;
+      long broaderCount = 0;
+      long similarsCount = 0;
+      try {
+         final BufferedReader reader = FileUtil.createReader( rrfPath );
+         List<String> tokens = FileUtil.readBsvTokens( reader, rrfPath );
+         while ( tokens != null ) {
+            lineCount++;
+            if ( tokens.size() > REFINED._index ) {
+               final String cui1 = CuiTuiUtil.getAsCui( tokens.get( CUI_1._index ) );
+               final String cui2 = CuiTuiUtil.getAsCui( tokens.get( CUI_2._index ) );
+               if ( wantedCuis.contains( cui1 ) && wantedCuis.contains( cui2 ) && !cui1.equals( cui2 ) ) {
+                  final String code = tokens.get( RELATION._index );
+                  if ( RelationCode.isSynonym( code ) ) {
+                     if ( addSynonym( cui1, cui2, cuiSynonyms ) ) {
+                        synonymCount++;
+                     }
+                     if ( addSynonym( cui2, cui1, cuiSynonyms ) ) {
+                        synonymCount++;
+                     }
+                  } else if ( RelationCode.isParent( code ) ) {
+                     if ( addParentChild( cui2, cui1, cuiParents, cuiChildren ) ) {
+                        progenyCount++;
+                     }
+                  } else if ( RelationCode.isChild( code ) ) {
+                     if ( addParentChild( cui1, cui2, cuiParents, cuiChildren ) ) {
+                        progenyCount++;
+                     }
+                  } else if ( RelationCode.isBroader( code ) ) {
+                     if ( addParentChild( cui2, cui1, cuiBroaders, cuiNarrowers ) ) {
+                        broaderCount++;
+                     }
+                  } else if ( RelationCode.isNarrower( code ) ) {
+                     if ( addParentChild( cui1, cui2, cuiBroaders, cuiNarrowers ) ) {
+                        broaderCount++;
+                     }
+                  } else if ( RelationCode.isSimilar( code ) ) {
+                     if ( addSynonym( cui1, cui2, cuiSimilars ) ) {
+                        similarsCount++;
+                     }
+                     if ( addSynonym( cui2, cui1, cuiSimilars ) ) {
+                        similarsCount++;
+                     }
+                  }
+               }
+            }
+            if ( lineCount % 2000 == 0 ) {
+               System.out.print( "." );
+               if ( lineCount % 100000 == 0 ) {
+                  System.out.println( "File Line " + lineCount
+                                            + "\t Synonyms " + synonymCount + "\t Progeny " + progenyCount
+                                            + "\t Broadeners " + broaderCount + "\t Similars " + similarsCount );
+               }
+            }
+            tokens = FileUtil.readBsvTokens( reader, rrfPath );
+         }
+         reader.close();
+      } catch ( IOException ioE ) {
+         System.err.println( ioE.getMessage() );
+      }
+      System.out.println( "File Line " + lineCount
+                                + "\t Synonyms " + synonymCount + "\t Progeny " + progenyCount
+                                + "\t Broadeners " + broaderCount + "\t Similars " + similarsCount );
+   }
+
+   static private boolean addParentChild( final String parentCui, final String childCui,
+                                          final Map<String, Collection<String>> cuiParents,
+                                          final Map<String, Collection<String>> cuiChildren ) {
+      Collection<String> children = cuiChildren.get( parentCui );
+      if ( children == null ) {
+         children = new HashSet<String>( 1 );
+         cuiChildren.put( parentCui, children );
+      }
+      final boolean addedChild = children.add( childCui );
+      Collection<String> parents = cuiParents.get( childCui );
+      if ( parents == null ) {
+         parents = new HashSet<String>( 1 );
+         cuiParents.put( childCui, parents );
+      }
+      final boolean addedParent = parents.add( parentCui );
+      return addedChild || addedParent;
+   }
+
+   static private boolean addSynonym( final String cui1, final String cui2,
+                                      final Map<String, Collection<String>> cuiSynonyms ) {
+      Collection<String> synonyms = cuiSynonyms.get( cui1 );
+      if ( synonyms == null ) {
+         synonyms = new HashSet<String>( 1 );
+         cuiSynonyms.put( cui1, synonyms );
+      }
+      return synonyms.add( cui2 );
+   }
+
+}

Propchange: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsRelationsForCuisReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsSemanticTypeTuiReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsSemanticTypeTuiReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsSemanticTypeTuiReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsSemanticTypeTuiReader.java Fri May  2 17:47:44 2014
@@ -16,9 +16,10 @@ import java.util.List;
  */
 final public class UmlsSemanticTypeTuiReader {
 
-   private UmlsSemanticTypeTuiReader() {}
+   private UmlsSemanticTypeTuiReader() {
+   }
 
-   static private final String SEMANTIC_TYPES_PATH = "C:/Spiffy/App/umls/2013AA/2013AA/META/SemGroups.txt";
+   //   static private final String SEMANTIC_TYPES_PATH = "C:/Spiffy/App/umls/2013AA/2013AA/META/SemGroups.txt";
 
    static private final int SEM_TYPE_ABBR = 0;
    static private final int SEM_TYPE_NAME = 1;
@@ -26,7 +27,7 @@ final public class UmlsSemanticTypeTuiRe
    static private final int SEM_TUI_NAME = 3;
 
    static public Collection<String> readSemanticTypeTuis( final String typeTuisPath,
-                                                           final Collection<String> semanticTypes ) {
+                                                          final Collection<String> semanticTypes ) {
       System.out.println( "Reading Tuis for Semantic Types from " + typeTuisPath );
       final Collection<String> typeTuis = new HashSet<String>();
       final Collection<String> usedTypes = new HashSet<String>( semanticTypes.size() );

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java Fri May  2 17:47:44 2014
@@ -2,7 +2,6 @@ package org.apache.ctakes.dictionarytool
 
 import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
 import org.apache.ctakes.dictionarytool.util.FileUtil;
-import org.apache.ctakes.dictionarytool.util.RRF_INDEX;
 import org.apache.ctakes.dictionarytool.util.UmlsTermUtil;
 
 import java.io.BufferedReader;
@@ -12,6 +11,11 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.CUI;
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.LANGUAGE;
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.STATUS;
+import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.TEXT;
+
 /**
  * Author: SPF
  * Affiliation: CHIP-NLP
@@ -19,15 +23,21 @@ import java.util.Map;
  */
 final public class UmlsTextsForCuisReader {
 
-//   static private final String RRF_PATH = "C:/Spiffy/App/umls/2013AA/2013AA/META/MRCONSO.RRF";
-
 
    private UmlsTextsForCuisReader() {
    }
 
    static public Map<String, Collection<String>> readTextsForCuis( final String rrfPath,
-                                                                    final Collection<String> wantedCuis,
-                                                                    final UmlsTermUtil umlsTermUtil ) {
+                                                                   final Collection<String> wantedCuis,
+                                                                   final UmlsTermUtil umlsTermUtil ) {
+      return readTextsForCuis( rrfPath, wantedCuis, umlsTermUtil, false, true );
+   }
+
+   static public Map<String, Collection<String>> readTextsForCuis( final String rrfPath,
+                                                                   final Collection<String> wantedCuis,
+                                                                   final UmlsTermUtil umlsTermUtil,
+                                                                   final boolean preferredOnly,
+                                                                   final boolean extractAbbreviations ) {
       System.out.println( "Compiling map of Umls Cuis and Texts" );
       long lineCount = 0;
       long textCount = 0;
@@ -37,11 +47,13 @@ final public class UmlsTextsForCuisReade
          List<String> tokens = FileUtil.readBsvTokens( reader, rrfPath );
          while ( tokens != null ) {
             lineCount++;
-            if ( tokens.size() > RRF_INDEX.TEXT._index && tokens.get( RRF_INDEX.LANGUAGE._index ).equals( "ENG" ) ) {
-               final String cui = CuiTuiUtil.getAsCui( tokens.get( RRF_INDEX.CUI._index ) );
+            if ( tokens.size() > TEXT._index
+                  && tokens.get( LANGUAGE._index ).equals( "ENG" )
+                  && (!preferredOnly || tokens.get( STATUS._index ).equals( "P" )) ) {
+               final String cui = CuiTuiUtil.getAsCui( tokens.get( CUI._index ) );
                if ( wantedCuis.contains( cui ) ) {
-                  String text = tokens.get( RRF_INDEX.TEXT._index );
-                  Collection<String> formattedTexts = umlsTermUtil.getFormattedTexts( text );
+                  String text = tokens.get( TEXT._index );
+                  Collection<String> formattedTexts = umlsTermUtil.getFormattedTexts( text, extractAbbreviations );
                   if ( formattedTexts == null || formattedTexts.isEmpty() ) {
                      tokens = FileUtil.readBsvTokens( reader, rrfPath );
                      continue;

Modified: ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTuisForCuisReader.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTuisForCuisReader.java?rev=1591985&r1=1591984&r2=1591985&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTuisForCuisReader.java (original)
+++ ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTuisForCuisReader.java Fri May  2 17:47:44 2014
@@ -11,6 +11,9 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
+import static org.apache.ctakes.dictionarytool.util.index.MrstyIndex.CUI;
+import static org.apache.ctakes.dictionarytool.util.index.MrstyIndex.TUI;
+
 /**
  * Author: SPF
  * Affiliation: CHIP-NLP
@@ -21,11 +24,6 @@ final public class UmlsTuisForCuisReader
    private UmlsTuisForCuisReader() {
    }
 
-//   static private final String CUI_TUI_PATH = "C:/Spiffy/App/umls/2013AA/2013AA/META/MRSTY.RRF";
-
-   static private final int CUI_INDEX = 0;
-   static private final int TUI_INDEX = 1;
-
    static public Map<String, Collection<String>> readUmlsTuisForCuis( final String cuiTuiMapPath,
                                                                       final Collection<String> cuis ) {
       System.out.println( "Compiling list of Tuis for wanted Cuis using " + cuiTuiMapPath );
@@ -37,13 +35,13 @@ final public class UmlsTuisForCuisReader
          List<String> tokens = FileUtil.readBsvTokens( reader, cuiTuiMapPath );
          while ( tokens != null ) {
             lineCount++;
-            if ( tokens.size() > TUI_INDEX ) {
-               final String cui = CuiTuiUtil.getAsCui( tokens.get( CUI_INDEX ) );
+            if ( tokens.size() > TUI._index ) {
+               final String cui = CuiTuiUtil.getAsCui( tokens.get( CUI._index ) );
                if ( !cuis.contains( cui ) ) {
                   tokens = FileUtil.readBsvTokens( reader, cuiTuiMapPath );
                   continue;
                }
-               final String tui = CuiTuiUtil.getAsTui( tokens.get( TUI_INDEX ) );
+               final String tui = CuiTuiUtil.getAsTui( tokens.get( TUI._index ) );
                Collection<String> tuis = cuisAndTuis.get( cui );
                if ( tuis == null ) {
                   tuis = new HashSet<String>( 1 );



Mime
View raw message