ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1719760 [3/4] - in /ctakes/sandbox/dictionary-gui: data/ data/default/ data/optional/ data/small/ data/tim/ data/tiny/ lib/ src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/ src/main/java/org/apache/ctakes/dictionary/creator/g...
Date Sun, 13 Dec 2015 04:27:44 GMT
Added: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java?rev=1719760&view=auto
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java (added)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/ctakes/DictionaryXmlWriter.java Sun Dec 13 04:27:42 2015
@@ -0,0 +1,132 @@
+package org.apache.ctakes.dictionary.creator.gui.ctakes;
+
+
+import org.apache.ctakes.dictionary.creator.gui.umls.Vocabulary;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+public class DictionaryXmlWriter {
+
+   static private final Logger LOGGER = LogManager.getLogger( "DictionaryXmlWriter" );
+
+
+   static public boolean writeXmlFile( final String databaseDir, final String databaseName ) {
+      final File scriptFile = new File( databaseDir, databaseName + ".xml" );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( scriptFile ) ) ) {
+         writer.write( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" );
+         writer.write( "<!--\n" );
+         writer.write( "Licensed to the Apache Software Foundation (ASF) under one\n" );
+         writer.write( "or more contributor license agreements.  See the NOTICE file\n" );
+         writer.write( "distributed with this work for additional information\n" );
+         writer.write( "regarding copyright ownership.  The ASF licenses this file\n" );
+         writer.write( "to you under the Apache License, Version 2.0 (the\n" );
+         writer.write( "\"License\"); you may not use this file except in compliance\n" );
+         writer.write( "with the License.  You may obtain a copy of the License at\n" );
+         writer.write( "http://www.apache.org/licenses/LICENSE-2.0\n" );
+         writer.write( "Unless required by applicable law or agreed to in writing,\n" );
+         writer.write( "software distributed under the License is distributed on an\n" );
+         writer.write( "\"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n" );
+         writer.write( "KIND, either express or implied.  See the License for the\n" );
+         writer.write( "specific language governing permissions and limitations\n" );
+         writer.write( "under the License.\n" );
+         writer.write( "-->\n\n" );
+         writer.write( "<!--    New format for the .xml lookup specification.  Uses table name and value type/class for Concept Factories.  -->\n" );
+         writer.write( "<lookupSpecification>\n" );
+         writer.write( "<dictionaries>\n" );
+         writer.write( "   <dictionary>\n" );
+         writer.write( "      <name>" + databaseName + "Terms</name>\n" );
+         writer.write( "      <implementationName>org.apache.ctakes.dictionary.lookup2.dictionary.UmlsJdbcRareWordDictionary</implementationName>\n" );
+         writer.write( "      <properties>\n" );
+         writer.write( "<!-- urls for hsqldb memory connections must be file types in hsql 1.8.\n" );
+         writer.write( "These file urls must be either absolute path or relative to current working directory.\n" );
+         writer.write( "They cannot be based upon the classpath.\n" );
+         writer.write( "Though JdbcConnectionFactory will attempt to \"find\" a db based upon the parent dir of the url\n" );
+         writer.write( "for the sake of ide ease-of-use, the user should be aware of these hsql limitations.\n" );
+         writer.write( "-->\n" );
+         writer.write( createProperty( "jdbcDriver", "org.hsqldb.jdbcDriver" ) );
+         writer.write( createProperty( "jdbcUrl",
+               "jdbc:hsqldb:file:resources/org/apache/ctakes/dictionary/lookup/fast/" + databaseName + "/" + databaseName ) );
+         writer.write( createProperty( "jdbcUser", "sa" ) );
+         writer.write( createProperty( "jdbcPass", "" ) );
+         writer.write( createProperty( "rareWordTable", "cui_terms" ) );
+         writer.write( createProperty( "umlsUrl", "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser" ) );
+         writer.write( createProperty( "umlsVendor", "NLM-6515182895" ) );
+         writer.write( createProperty( "umlsUser", "CHANGE_ME" ) );
+         writer.write( createProperty( "umlsPass", "CHANGE_ME" ) );
+         writer.write( "      </properties>\n" );
+         writer.write( "   </dictionary>\n" );
+         writer.write( "</dictionaries>\n" );
+         writer.write( "\n" );
+         writer.write( "<conceptFactories>\n" );
+         writer.write( "   <conceptFactory>\n" );
+         writer.write( "      <name>" + databaseName + "Concepts</name>\n" );
+         writer.write( "      <implementationName>org.apache.ctakes.dictionary.lookup2.concept.UmlsJdbcConceptFactory</implementationName>\n" );
+         writer.write( "      <properties>\n" );
+         writer.write( createProperty( "jdbcDriver", "org.hsqldb.jdbcDriver" ) );
+         writer.write( createProperty( "jdbcUrl",
+               "jdbc:hsqldb:file:resources/org/apache/ctakes/dictionary/lookup/fast/" + databaseName ) );
+         writer.write( createProperty( "jdbcUser", "sa" ) );
+         writer.write( createProperty( "jdbcPass", "" ) );
+         writer.write( createProperty( "rareWordTable", "cui_terms" ) );
+         writer.write( createProperty( "umlsUrl", "https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser" ) );
+         writer.write( createProperty( "umlsVendor", "NLM-6515182895" ) );
+         writer.write( createProperty( "umlsUser", "CHANGE_ME" ) );
+         writer.write( createProperty( "umlsPass", "CHANGE_ME" ) );
+         writer.write( createProperty( "tuiTable", "tui" ) );
+         writer.write( createProperty( "prefTermTable", "prefTerm" ) );
+         writer.write( "<!-- Optional tables for optional term info.\n" );
+         writer.write( "Uncommenting these lines alone may not persist term information;\n" );
+         writer.write( "persistence depends upon the TermConsumer.  -->\n" );
+         for ( String vocabulary : Vocabulary.getInstance().getAllVocabularies() ) {
+            writer.write( createProperty( vocabulary.toLowerCase().replace( '.','_' )
+                                          + "Table", Vocabulary.getInstance().getCtakesClass( vocabulary ) ) );
+         }
+         writer.write( "      </properties>\n" );
+         writer.write( "   </conceptFactory>\n" );
+         writer.write( "</conceptFactories>\n" );
+         writer.write( "\n" );
+         writer.write( "<!--  Defines what terms and concepts will be used  -->\n" );
+         writer.write( "<dictionaryConceptPairs>\n" );
+         writer.write( "   <dictionaryConceptPair>\n" );
+         writer.write( "      <name>" + databaseName + "Pair</name>\n" );
+         writer.write( "      <dictionaryName>" + databaseName + "Terms</dictionaryName>\n" );
+         writer.write( "      <conceptFactoryName>" + databaseName + "Concepts</conceptFactoryName>\n" );
+         writer.write( "   </dictionaryConceptPair>\n" );
+         writer.write( "</dictionaryConceptPairs>\n" );
+         writer.write( "\n" );
+         writer.write( "<!-- DefaultTermConsumer will persist all spans.\n" );
+         writer.write( "PrecisionTermConsumer will only persist only the longest overlapping span of any semantic group.\n" );
+         writer.write( "SemanticCleanupTermConsumer works as Precision** but also removes signs/sympoms contained within disease/disorder,\n" );
+         writer.write( "and (just in case) removes any s/s and d/d that are also (exactly) anatomical sites. -->\n" );
+         writer.write( "<rareWordConsumer>\n" );
+         writer.write( "   <name>Term Consumer</name>\n" );
+         writer.write( "   <implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>\n" );
+         writer.write( "   <!--<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.PrecisionTermConsumer</implementationName>-->\n" );
+         writer.write( "   <!--<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.SemanticCleanupTermConsumer</implementationName>-->\n" );
+         writer.write( "   <properties>\n" );
+         writer.write( "<!-- Depending upon the consumer, the value of codingScheme may or may not be used.  With the packaged consumers,\n" );
+         writer.write( "codingScheme is a default value used only for cuis that do not have secondary codes (snomed, rxnorm, etc.)  -->\n" );
+         writer.write( createProperty( "codingScheme", databaseName ) );
+         writer.write( "   </properties>\n" );
+         writer.write( "</rareWordConsumer>\n" );
+         writer.write( "\n" );
+         writer.write( "</lookupSpecification>\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+   static private String createProperty( final String name, final String value ) {
+      return "         <property key=\"" + name + "\" value=\"" + value + "\"/>\n";
+   }
+
+}

Modified: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java?rev=1719760&r1=1719759&r2=1719760&view=diff
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/main/MainPanel.java Sun Dec 13 04:27:42 2015
@@ -1,15 +1,27 @@
 package org.apache.ctakes.dictionary.creator.gui.main;
 
+import org.apache.ctakes.dictionary.creator.gui.ctakes.DictionaryBuilder;
+import org.apache.ctakes.dictionary.creator.gui.umls.MrconsoIndex;
 import org.apache.ctakes.dictionary.creator.gui.umls.SourceTableModel;
+import org.apache.ctakes.dictionary.creator.gui.umls.Tui;
 import org.apache.ctakes.dictionary.creator.gui.umls.TuiTableModel;
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import javax.swing.*;
+import javax.swing.border.EmptyBorder;
 import javax.swing.table.TableModel;
+import javax.swing.text.JTextComponent;
 import java.awt.*;
 import java.awt.event.ActionEvent;
 import java.awt.event.ActionListener;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
 
 /**
  * @author SPF , chip-nlp
@@ -20,23 +32,26 @@ final public class MainPanel extends JPa
 
    static private final Logger LOGGER = LogManager.getLogger( "MainPanel" );
 
+   private String _umlsDirPath = System.getProperty( "user.dir" );
+   private String _ctakesPath = System.getProperty( "user.dir" );
+   final TuiTableModel _tuiModel = new TuiTableModel();
+   final SourceTableModel _sourceModel = new SourceTableModel();
 
    public MainPanel() {
       super( new BorderLayout() );
 
-      final SourceTableModel sourceModel = new SourceTableModel();
-
       final JComponent sourceDirPanel = new JPanel( new GridLayout( 2, 1 ) );
-      sourceDirPanel.add( new DirChooser("Ctakes Installation:", "C:/", new CtakesDirListener() ) );
-      sourceDirPanel.add( new DirChooser( "UMLS Installation:", "C:/", new UmlsDirListener( sourceModel ) ) );
+      sourceDirPanel.add( new DirChooser( "cTAKES Installation:", _umlsDirPath, new CtakesDirListener() ) );
+      sourceDirPanel.add( new DirChooser( "UMLS Installation:", _ctakesPath, new UmlsDirListener() ) );
       add( sourceDirPanel, BorderLayout.NORTH );
 
-      add( createTuiTable(), BorderLayout.WEST );
-      add( createSourceTable( sourceModel ), BorderLayout.EAST );
+      add( createTuiTable( _tuiModel ), BorderLayout.WEST );
+      add( createSourceTable( _sourceModel ), BorderLayout.EAST );
+      add( createGoPanel(), BorderLayout.SOUTH );
    }
 
-   private JComponent createTuiTable() {
-      final JTable tuiTable = new JTable( new TuiTableModel() );
+   private JComponent createTuiTable( final TableModel tuiModel ) {
+      final JTable tuiTable = new JTable( tuiModel );
       tuiTable.setCellSelectionEnabled( false );
       tuiTable.setShowVerticalLines( false );
       tuiTable.setAutoCreateRowSorter( true );
@@ -57,25 +72,152 @@ final public class MainPanel extends JPa
       return new JScrollPane( tuiTable );
    }
 
+   private JComponent createGoPanel() {
+      final JPanel panel = new JPanel( new BorderLayout( 10, 10 ) );
+      panel.setBorder( new EmptyBorder( 2, 10, 2, 10 ) );
+      final JLabel label = new JLabel( "Dictionary Name:" );
+      label.setPreferredSize( new Dimension( 100, 0 ) );
+      label.setHorizontalAlignment( SwingConstants.TRAILING );
+      final JTextField textField = new JTextField( "custom" );
+      final JButton buildButton = new JButton( new BuildDictionaryAction( textField ) );
+      panel.add( label, BorderLayout.WEST );
+      panel.add( textField, BorderLayout.CENTER );
+      panel.add( buildButton, BorderLayout.EAST );
+      return panel;
+   }
 
-   private class UmlsDirListener implements ActionListener {
-      final private SourceTableModel __sourceModel;
-      private UmlsDirListener( final SourceTableModel sourceModel ) {
-         __sourceModel = sourceModel;
+   private String setUmlsDirPath( final String umlsDirPath ) {
+      File mrConso = new File( umlsDirPath, "MRCONSO.RRF" );
+      if ( mrConso.isFile() ) {
+         _umlsDirPath = mrConso.getParentFile().getParent();
+      }
+      final String plusMetaPath = new File( umlsDirPath, "META" ).getPath();
+      mrConso = new File( plusMetaPath, "MRCONSO.RRF" );
+      if ( mrConso.isFile() ) {
+         _umlsDirPath = umlsDirPath;
+      } else {
+         error( "Invalid UMLS Installation", umlsDirPath + " is not a valid path to a UMLS installation" );
+      }
+      return _umlsDirPath;
+   }
+
+   private void loadSources() {
+      SwingUtilities.invokeLater( new SourceLoadRunner( _umlsDirPath ) );
+   }
+
+   private class SourceLoadRunner implements Runnable {
+      private final String __umlsDirPath;
+      private SourceLoadRunner( final String umlsDirPath ) {
+         __umlsDirPath = umlsDirPath;
+      }
+      public void run() {
+         SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ) );
+         final File mrConso = new File( __umlsDirPath + "/META", "MRCONSO.RRF" );
+         final String mrConsoPath = mrConso.getPath();
+         LOGGER.info( "Parsing vocabulary types from " + mrConsoPath );
+         final Collection<String> sources = new HashSet<>();
+         try ( final BufferedReader reader = FileUtil.createReader( mrConsoPath ) ) {
+            int lineCount = 0;
+            java.util.List<String> tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
+            while ( tokens != null ) {
+               lineCount++;
+               if ( tokens.size() > MrconsoIndex.SOURCE._index ) {
+                  sources.add( tokens.get( MrconsoIndex.SOURCE._index ) );
+               }
+               if ( lineCount % 100000 == 0 ) {
+                  LOGGER.info( "File Line " + lineCount + "\t Vocabularies " + sources.size() );
+               }
+               tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
+            }
+            LOGGER.info( "Parsed " + sources.size() + " vocabulary types" );
+            _sourceModel.setSources( sources );
+         } catch ( IOException ioE ) {
+            error( "Vocabulary Parse Error", ioE.getMessage() );
+         }
+         SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getDefaultCursor() );
+      }
+   }
+
+   private void buildDictionary( final String dictionaryName ) {
+      SwingUtilities.invokeLater(
+            new DictionaryBuildRunner( _umlsDirPath, _ctakesPath, dictionaryName,
+                  _sourceModel.getWantedSources(), _sourceModel.getWantedTargets(), _tuiModel.getWantedTuis() ) );
+   }
+
+   private class DictionaryBuildRunner implements Runnable {
+      private final String __umlsDirPath;
+      private final String __ctakesDirPath;
+      private final String __dictionaryName;
+      private final Collection<String> __wantedSources;
+      private final Collection<String> __wantedTargets;
+      private final Collection<Tui> __wantedTuis;
+      private DictionaryBuildRunner( final String umlsDirPath, final String ctakesDirPath, final String dictionaryName,
+                                     final Collection<String> wantedSources, final Collection<String> wantedTargets,
+                                     final Collection<Tui> wantedTuis ) {
+         __umlsDirPath = umlsDirPath;
+         __ctakesDirPath = ctakesDirPath;
+         __dictionaryName = dictionaryName;
+         __wantedSources = new ArrayList<>( wantedSources );
+         __wantedTargets = new ArrayList<>( wantedTargets );
+         __wantedTuis = new ArrayList<>( wantedTuis );
       }
+      public void run() {
+         SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getPredefinedCursor( Cursor.WAIT_CURSOR ) );
+         if ( DictionaryBuilder.buildDictionary( __umlsDirPath, __ctakesDirPath, __dictionaryName,
+               __wantedSources, __wantedTargets, __wantedTuis ) ) {
+            final String message = "Dictionary " + __dictionaryName + " successfully built in " + __ctakesDirPath;
+            LOGGER.info( message );
+            JOptionPane.showMessageDialog( MainPanel.this, message, "Dictionary Built", JOptionPane.INFORMATION_MESSAGE );
+         } else {
+            error( "Build Failure", "Dictionary " + __dictionaryName + " could not be built in " + __ctakesDirPath );
+         }
+         SwingUtilities.getRoot( MainPanel.this ).setCursor( Cursor.getDefaultCursor() );
+      }
+   }
+
+   private void error( final String title, final String message ) {
+      LOGGER.error( message );
+      JOptionPane.showMessageDialog( MainPanel.this, message, title, JOptionPane.ERROR_MESSAGE );
+   }
+
+   private class UmlsDirListener implements ActionListener {
       public void actionPerformed( final ActionEvent event ) {
-         __sourceModel.initialize( event.getActionCommand() );
+         final String oldPath = _umlsDirPath;
+         final String newPath = setUmlsDirPath( event.getActionCommand() );
+         if ( !oldPath.equals( newPath ) ) {
+            loadSources();
+         }
       }
    }
 
 
    private class CtakesDirListener implements ActionListener {
       public void actionPerformed( final ActionEvent event ) {
-         LOGGER.info( "Directory changed to " + event.getSource().getClass().getName()
-                      + " " + event.getID() + " " + event.getActionCommand() );
+         _ctakesPath = event.getActionCommand();
       }
    }
 
 
+   /**
+    * Opens the JFileChooser
+    */
+   private class BuildDictionaryAction extends AbstractAction {
+      private final JTextComponent __textComponent;
+
+      private BuildDictionaryAction( final JTextComponent textComponent ) {
+         super( "Build Dictionary" );
+         __textComponent = textComponent;
+      }
+
+      @Override
+      public void actionPerformed( final ActionEvent event ) {
+         final String dictionaryName = __textComponent.getText();
+         if ( dictionaryName != null && !dictionaryName.isEmpty() ) {
+            buildDictionary( dictionaryName.toLowerCase() );
+         } else {
+            error( "Invalid Dictionary Name", "Please Specify a Dictionary Name" );
+         }
+      }
+   }
 
 }

Copied: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java (from r1716234, ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/DefaultConcept.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java?p2=ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java&p1=ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/DefaultConcept.java&r1=1716234&r2=1719760&rev=1719760&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/concept/DefaultConcept.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Concept.java Sun Dec 13 04:27:42 2015
@@ -1,14 +1,11 @@
-package org.apache.ctakes.dictionary.lookup2.concept;
+package org.apache.ctakes.dictionary.creator.gui.umls;
 
-import org.apache.ctakes.dictionary.lookup2.util.SemanticUtil;
-import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
-import org.apache.ctakes.dictionary.lookup2.util.collection.HashSetMap;
-import org.apache.ctakes.dictionary.lookup2.util.collection.ImmutableCollectionMap;
-import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.dictionary.creator.util.collection.CollectionMap;
+import org.apache.ctakes.dictionary.creator.util.collection.HashSetMap;
 
-import javax.annotation.concurrent.Immutable;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.EnumSet;
 import java.util.HashSet;
 
 /**
@@ -16,65 +13,38 @@ import java.util.HashSet;
  * Affiliation: CHIP-NLP
  * Date: 11/20/13
  */
-@Immutable
-final public class DefaultConcept implements Concept {
+final public class Concept {
 
-   final private String _cui;
-   final private String _preferredText;
+   static public String PREFERRED_TERM_UNKNOWN = "Unknown Preferred Term";
+
+   private String _preferredText = null;
+   final private Collection<String> _texts;
    final private CollectionMap<String, String, ? extends Collection<String>> _codes;
-   final private Collection<Integer> _ctakesSemantics;
+   final private Collection<Tui> _tuis;
 
-   final private int _hashcode;
 
-   /**
-    * @param cui -
-    */
-   public DefaultConcept( final String cui ) {
-      this( cui, "" );
-   }
-
-   /**
-    * @param cui           -
-    * @param preferredText -
-    */
-   public DefaultConcept( final String cui, final String preferredText ) {
-      this( cui, preferredText, new HashSetMap<String, String>() );
-   }
-
-   /**
-    * @param cui           -
-    * @param preferredText -
-    * @param codes         collection of coding scheme names and this concept's codes for those schemes
-    */
-   public DefaultConcept( final String cui, final String preferredText,
-                          final CollectionMap<String, String, ? extends Collection<String>> codes ) {
-      _cui = cui;
-      _preferredText = preferredText;
-      _codes = new ImmutableCollectionMap<>( codes );
-      final Collection<Integer> ctakesSemantics = new HashSet<>();
-      for ( String tui : getCodes( TUI ) ) {
-         // Attempt to obtain one or more valid type ids from the tuis of the term
-         ctakesSemantics.add( SemanticUtil.getTuiSemanticGroupId( tui ) );
-      }
-      if ( ctakesSemantics.isEmpty() ) {
-         ctakesSemantics.add( CONST.NE_TYPE_ID_UNKNOWN );
-      }
-      _ctakesSemantics = Collections.unmodifiableCollection( ctakesSemantics );
-      _hashcode = cui.hashCode();
+   public Concept() {
+      _codes = new HashSetMap<>( 0 );
+      _texts = new HashSet<>( 1 );
+      _tuis = EnumSet.noneOf( Tui.class );
+   }
+
+   public void addTexts( final Collection<String> texts ) {
+      _texts.addAll( texts );
+   }
+
+   public void removeText( final String text ) {
+      _texts.remove( text );
+   }
+
+   public Collection<String> getTexts() {
+      return _texts;
+   }
+
+   public void setPreferredText( final String text ) {
+      _preferredText = text;
    }
 
-   /**
-    * {@inheritDoc}
-    */
-   @Override
-   public String getCui() {
-      return _cui;
-   }
-
-   /**
-    * {@inheritDoc}
-    */
-   @Override
    public String getPreferredText() {
       if ( _preferredText != null ) {
          return _preferredText;
@@ -82,53 +52,42 @@ final public class DefaultConcept implem
       return PREFERRED_TERM_UNKNOWN;
    }
 
-   /**
-    * {@inheritDoc}
-    */
-   @Override
-   public Collection<String> getCodeNames() {
+   public void addCode( final String vocabulary, final String code ) {
+      _codes.placeValue( vocabulary, code );
+      Vocabulary.getInstance().addVocabulary( vocabulary, code );
+   }
+
+   public Collection<String> getVocabularies() {
       return _codes.keySet();
    }
 
-   /**
-    * {@inheritDoc}
-    */
-   @Override
-   public Collection<String> getCodes( final String codeType ) {
-      return _codes.getCollection( codeType );
-   }
-
-   /**
-    * {@inheritDoc}
-    */
-   @Override
-   public Collection<Integer> getCtakesSemantics() {
-      return _ctakesSemantics;
-   }
-
-   /**
-    * {@inheritDoc}
-    */
-   @Override
-   public boolean isEmpty() {
-      return (_preferredText == null || _preferredText.isEmpty()) && _codes.isEmpty();
+   public Collection<String> getCodes( final String vocabulary ) {
+      final Collection<String> codes = _codes.getCollection( vocabulary );
+      if ( codes == null ) {
+         return Collections.emptyList();
+      }
+      return codes;
    }
 
+   public void addTui( final Tui tui ) {
+      _tuis.add( tui );
+   }
 
-   /**
-    * {@inheritDoc}
-    */
-   @Override
-   public boolean equals( final Object value ) {
-      return value instanceof Concept && _cui.equals( ((DefaultConcept)value)._cui );
-   }
-
-   /**
-    * {@inheritDoc}
-    */
-   @Override
-   public int hashCode() {
-      return _hashcode;
+   public Collection<Tui> getTuis() {
+      return _tuis;
+   }
+
+   public void mergeWith( final Concept concept ) {
+      addTexts( concept.getTexts() );
+      concept.getTuis().stream().forEach( this::addTui );
+      if ( _preferredText == null || _preferredText.isEmpty() ) {
+         setPreferredText( concept.getPreferredText() );
+      }
+      for ( String vocabulary : concept.getVocabularies() ) {
+         for ( String code : concept.getCodes( vocabulary ) ) {
+            addCode( vocabulary, code );
+         }
+      }
    }
 
 }

Added: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java?rev=1719760&view=auto
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java (added)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/ConceptMapFactory.java Sun Dec 13 04:27:42 2015
@@ -0,0 +1,62 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/11/2015
+ */
+public class ConceptMapFactory {
+
+   static private final Logger LOGGER = LogManager.getLogger( "ConceptMapFactory" );
+
+
+   static public Map<Long,Concept> createConceptMap( final String umlsDirPath,
+                                                      final Collection<String> wantedSources,
+                                                      final Collection<Tui> wantedTuis,
+                                                      final String tuiTypes ) {
+      if ( wantedTuis.isEmpty() ) {
+         LOGGER.warn( "No valid " + tuiTypes + " Tuis" );
+         return Collections.emptyMap();
+      }
+
+      // get the valid Cuis for all wanted Tuis
+      final Map<Long, Concept> concepts = MrstyParser.createConceptsForTuis( umlsDirPath, wantedTuis );
+      if ( concepts.isEmpty() ) {
+         LOGGER.warn( "No valid " + tuiTypes + " Tuis" );
+         return Collections.emptyMap();
+      }
+      // filter out the Cuis that do not belong to the given sources
+      final Collection<Long> validVocabularyCuis = MrconsoParser.getValidVocabularyCuis( umlsDirPath, wantedSources );
+      concepts.keySet().retainAll( validVocabularyCuis );
+      LOGGER.info( "Total Valid Cuis " + concepts.size() + "\t from wanted Tuis and Vocabularies" );
+      return concepts;
+   }
+
+   static public Map<Long,Concept> createRxConceptMap( final String umlsDirPath,
+                                                      final Collection<Tui> wantedTuis ) {
+      if ( wantedTuis.isEmpty() ) {
+         LOGGER.warn( "No valid Medication Tuis" );
+         return Collections.emptyMap();
+      }
+      // get the valid Cuis for all wanted Tuis
+      final Map<Long, Concept> concepts = MrstyParser.createConceptsForTuis( umlsDirPath, wantedTuis );
+      if ( concepts.isEmpty() ) {
+         LOGGER.warn( "No valid Medication Tuis" );
+         return Collections.emptyMap();
+      }
+      // filter out the Cuis that do not belong to the given sources
+      final Collection<Long> validVocabularyCuis = MrconsoParser.getValidRxNormCuis( umlsDirPath );
+      concepts.keySet().retainAll( validVocabularyCuis );
+      LOGGER.info( "Total Valid Medication Cuis " + concepts.size() + "\t from wanted Tuis and Vocabularies" );
+      return concepts;
+   }
+
+}

Copied: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java (from r1719002, ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiCodeUtil.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java?p2=ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java&p1=ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiCodeUtil.java&r1=1719002&r2=1719760&rev=1719760&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiCodeUtil.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/CuiCodeUtil.java Sun Dec 13 04:27:42 2015
@@ -1,4 +1,4 @@
-package org.apache.ctakes.dictionarytool.util;
+package org.apache.ctakes.dictionary.creator.gui.umls;
 
 
 import java.util.ArrayList;
@@ -23,7 +23,7 @@ public enum CuiCodeUtil {
 
    final private List<PrefixerPair> _prefixerPairList = new ArrayList<>();
 
-   private CuiCodeUtil() {
+   CuiCodeUtil() {
       // Add the standard C as the default encoding prefix
       _prefixerPairList.add( new PrefixerPair( "C0000000" ) );
    }

Copied: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java (from r1719002, ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java?p2=ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java&p1=ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java&r1=1719002&r2=1719760&rev=1719760&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/reader/UmlsTextsForCuisReader.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrconsoParser.java Sun Dec 13 04:27:42 2015
@@ -1,104 +1,193 @@
-package org.apache.ctakes.dictionarytool.reader;
+package org.apache.ctakes.dictionary.creator.gui.umls;
 
-import org.apache.ctakes.dictionarytool.util.CuiTuiUtil;
-import org.apache.ctakes.dictionarytool.util.FileUtil;
-import org.apache.ctakes.dictionarytool.util.UmlsTermUtil;
-import org.apache.ctakes.dictionarytool.util.collection.HashSetMap;
-import org.apache.ctakes.dictionarytool.util.index.MrconsoIndex;
+
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 import java.io.BufferedReader;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-
-import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.CUI;
-import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.FORM;
-import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.LANGUAGE;
-import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.STATUS;
-import static org.apache.ctakes.dictionarytool.util.index.MrconsoIndex.TEXT;
+import java.util.*;
+
+import static org.apache.ctakes.dictionary.creator.gui.umls.MrconsoIndex.*;
 
 /**
  * Author: SPF
  * Affiliation: CHIP-NLP
  * Date: 1/17/14
  */
-final public class UmlsTextsForCuisReader {
+final public class MrconsoParser {
+
+   static private final Logger LOGGER = LogManager.getLogger( "MrConsoParser" );
+
+   static private final String MR_CONSO_SUB_PATH = "/META/MRCONSO.RRF";
 
+   //   https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/SNOMEDCT_US/stats.html
+   //   https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/RXNORM/stats.html
+   static private final String[] EXCLUSION_TYPES = { "FN", "CCS", "CA2", "CA3", "PSN", "TMSY",
+                                                     "SBD", "SBDC", "SBDF", "SBDG",
+                                                     "SCD", "SCDC", "SCDF", "SCDG", "BPCK", "GPCK" };
+   static private final String EXCLUSION_RXNORM = "SY";
 
-   private UmlsTextsForCuisReader() {
+   private MrconsoParser() {
    }
 
-   static public HashSetMap<Long, String> readTextsForCuis( final String rrfPath,
-                                                            final Collection<Long> wantedCuis,
-                                                            final UmlsTermUtil umlsTermUtil ) {
-      return readTextsForCuis( rrfPath, wantedCuis, umlsTermUtil,
-            false, true, 1, Integer.MAX_VALUE );
+   static public Map<Long, Concept> parseConcepts( final String umlsDirPath,
+                                                   final Map<Long, Concept> concepts,
+                                                   final Collection<String> wantedTargets,
+                                                   final UmlsTermUtil umlsTermUtil ) {
+      return parseConcepts( umlsDirPath, concepts, wantedTargets, umlsTermUtil, 1, Integer.MAX_VALUE );
    }
 
-   static public HashSetMap<Long, String> readTextsForCuis( final String rrfPath,
-                                                            final Collection<Long> wantedCuis,
-                                                            final UmlsTermUtil umlsTermUtil,
-                                                            final boolean preferredOnly,
-                                                            final boolean extractAbbreviations,
-                                                            final int minWordLength,
-                                                            final int maxWordCount ) {
-      return readTextsForCuis( rrfPath, wantedCuis, umlsTermUtil, new ArrayList<String>(0),
-            preferredOnly, extractAbbreviations, minWordLength, maxWordCount );
+   static public Map<Long, Concept> parseConcepts( final String umlsDirPath,
+                                                   final Map<Long, Concept> concepts,
+                                                   final Collection<String> wantedTargets,
+                                                   final UmlsTermUtil umlsTermUtil,
+                                                   final int minCharLength,
+                                                   final int maxWordCount ) {
+      return parseConcepts( umlsDirPath, concepts, wantedTargets, umlsTermUtil, true, minCharLength, maxWordCount );
+   }
+
+   static public Map<Long, Concept> parseConcepts( final String umlsDirPath,
+                                                   final Map<Long, Concept> concepts,
+                                                   final Collection<String> wantedTargets,
+                                                   final UmlsTermUtil umlsTermUtil,
+                                                   final boolean extractAbbreviations,
+                                                   final int minWordLength,
+                                                   final int maxWordCount ) {
+      return parseConcepts( umlsDirPath, concepts, wantedTargets, umlsTermUtil, Collections.emptyList(),
+            extractAbbreviations, minWordLength, maxWordCount );
    }
 
 
-   static public HashSetMap<Long, String> readTextsForCuis( final String rrfPath,
-                                                            final Collection<Long> wantedCuis,
-                                                            final UmlsTermUtil umlsTermUtil,
-                                                            final Collection<String> unwantedTexts,
-                                                            final boolean preferredOnly,
-                                                            final boolean extractAbbreviations,
-                                                            final int minWordLength,
-                                                            final int maxWordCount ) {
-      System.out.println( "Compiling map of Umls Cuis and Texts" );
+   static public Map<Long, Concept> parseConcepts( final String umlsDirPath,
+                                                   final Map<Long, Concept> concepts,
+                                                   final Collection<String> wantedTargets,
+                                                   final UmlsTermUtil umlsTermUtil,
+                                                   final Collection<String> unwantedTexts,
+                                                   final boolean extractAbbreviations,
+                                                   final int minWordLength,
+                                                   final int maxWordCount ) {
+      final String mrconsoPath = umlsDirPath + MR_CONSO_SUB_PATH;
+      LOGGER.info( "Compiling map of Umls Cuis and Texts from " + mrconsoPath );
       long lineCount = 0;
-      long textCount = 0;
-      final HashSetMap<Long, String> cuisAndText = new HashSetMap<>( wantedCuis.size() );
-      try {
-         final BufferedReader reader = FileUtil.createReader( rrfPath );
-         List<String> tokens = FileUtil.readBsvTokens( reader, rrfPath );
+      try ( final BufferedReader reader = FileUtil.createReader( mrconsoPath ) ) {
+         List<String> tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
          while ( tokens != null ) {
             lineCount++;
-            if ( tokens.size() > TEXT._index
-                  && getToken( tokens, LANGUAGE ).equals( "ENG" )
-                  && (!preferredOnly
-                  || (getToken( tokens, STATUS ).equals( "P" ) && getToken( tokens, FORM ).equals( "PF" ))) ) {
-               final Long cuiCode = CuiTuiUtil.getCuiCode( getToken( tokens, CUI ) );
-               if ( wantedCuis.contains( cuiCode ) ) {
-                  String text = getToken( tokens, TEXT );
-                  Collection<String> formattedTexts = umlsTermUtil.getFormattedTexts( text, extractAbbreviations,
-                                                                                      minWordLength, maxWordCount );
-                  if ( formattedTexts == null || formattedTexts.isEmpty() ) {
-                     tokens = FileUtil.readBsvTokens( reader, rrfPath );
-                     continue;
-                  }
-                  formattedTexts.removeAll( unwantedTexts );
-                  textCount += cuisAndText.addAll( cuiCode, formattedTexts );
-               }
+            if ( lineCount % 100000 == 0 ) {
+               LOGGER.info( "File Line " + lineCount );
             }
-            if ( lineCount % 2000 == 0 ) {
-               System.out.print( "." );
-               if ( lineCount % 100000 == 0 ) {
-                  System.out.println( "File Line " + lineCount + "\t Terms " + textCount );
+            if ( tokens.size() > TEXT._index && getToken( tokens, LANGUAGE ).equals( "ENG" ) ) {
+               final Long cuiCode = CuiCodeUtil.getInstance().getCuiCode( getToken( tokens, CUI ) );
+               final Concept concept = concepts.get( cuiCode );
+               if ( concept == null ) {
+                  tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+                  continue;
                }
+               final String source = getToken( tokens, SOURCE );
+               if ( wantedTargets.contains( source ) ) {
+                  concept.addCode( source, getToken( tokens, SOURCE_CODE ) );
+               }
+               final String text = getToken( tokens, TEXT );
+               if ( getToken( tokens, STATUS ).equals( "P" ) && getToken( tokens, FORM ).equals( "PF" ) ) {
+                  concept.setPreferredText( text );
+               }
+               Collection<String> formattedTexts = umlsTermUtil.getFormattedTexts( text, extractAbbreviations,
+                                                                                   minWordLength, maxWordCount );
+               if ( formattedTexts == null || formattedTexts.isEmpty() ) {
+                  tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+                  continue;
+               }
+               formattedTexts.removeAll( unwantedTexts );
+               concept.addTexts( formattedTexts );
+            }
+            tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+      }
+      LOGGER.info( "File Lines " + lineCount );
+      return concepts;
+   }
+
+
+   /**
+    * Can cull the given collection of cuis
+    *
+    * @param umlsDirPath     path to the UMLS_ROOT Meta/MRCONSO.RRF file
+    * @param sourceVocabularies desired source type names as appear in rrf: RXNORM, SNOMEDCT, MSH, etc.
+    * @return Subset of cuis that exist in in the given sources
+    */
+   static public Collection<Long> getValidVocabularyCuis( final String umlsDirPath,
+                                                          final Collection<String> sourceVocabularies ) {
+      return getValidVocabularyCuis( umlsDirPath, sourceVocabularies, EXCLUSION_TYPES );
+   }
+
+   /**
+    * Can cull the given collection of cuis
+    *
+    * @param umlsDirPath     path to the UMLS_ROOT Meta/MRCONSO.RRF file
+    * @return Subset of cuis that exist in in the given sources
+    */
+   static public Collection<Long> getValidRxNormCuis( final String umlsDirPath ) {
+      final String[] exclusionTypes = Arrays.copyOf( EXCLUSION_TYPES, EXCLUSION_TYPES.length + 1 );
+      exclusionTypes[ EXCLUSION_TYPES.length ] = EXCLUSION_RXNORM;
+      return getValidVocabularyCuis( umlsDirPath, Collections.singletonList( "RXNORM" ), exclusionTypes );
+   }
+
+   /**
+    * Can cull the given collection of cuis
+    *
+    * @param umlsDirPath     path to the UMLS_ROOT Meta/MRCONSO.RRF file
+    * @param sourceVocabularies desired source type names as appear in rrf: RXNORM, SNOMEDCT, MSH, etc.
+    * @param invalidTypes term type names as appear in rrf: FN, CCS, etc. that are not valid
+    * @return Subset of cuis that exist in in the given sources
+    */
+   static private Collection<Long> getValidVocabularyCuis( final String umlsDirPath,
+                                                           final Collection<String> sourceVocabularies,
+                                                           final String... invalidTypes ) {
+      final String mrconsoPath = umlsDirPath + MR_CONSO_SUB_PATH;
+      LOGGER.info( "Compiling list of Cuis with wanted Vocabularies using " + mrconsoPath );
+      final Collection<Long> validCuis = new HashSet<>();
+      long lineCount = 0;
+      try ( final BufferedReader reader = FileUtil.createReader( mrconsoPath ) ) {
+         List<String> tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
+         while ( tokens != null ) {
+            lineCount++;
+            if ( lineCount % 100000 == 0 ) {
+               LOGGER.info( "File Line " + lineCount + "\t Valid Cuis " + validCuis.size() );
+            }
+            if ( tokens.size() > SOURCE._index
+                 && sourceVocabularies.stream().anyMatch( getToken( tokens, SOURCE )::equals )
+                 && Arrays.stream( invalidTypes ).noneMatch( getToken( tokens, TERM_TYPE )::equals ) ) {
+               final Long cuiCode = CuiCodeUtil.getInstance().getCuiCode( getToken( tokens, CUI ) );
+               validCuis.add( cuiCode );
             }
-            tokens = FileUtil.readBsvTokens( reader, rrfPath );
+            tokens = FileUtil.readBsvTokens( reader, mrconsoPath );
          }
-         reader.close();
       } catch ( IOException ioE ) {
-         System.err.println( ioE.getMessage() );
+         LOGGER.error( ioE.getMessage() );
       }
-      System.out.println( "File Line " + lineCount + "\t Terms " + textCount );
-      return cuisAndText;
+      LOGGER.info( "File Lines " + lineCount + "\t Valid Cuis " + validCuis.size() + "\t for wanted Vocabularies" );
+      return validCuis;
    }
 
+//   /**
+//    * Given a collection of cuis, returns all of the cuis that don't exist for the given source types
+//    *
+//    * @param rrfPath     path to the UMLS_ROOT Meta/MRCONSO.RRF file
+//    * @param sourceTypes desired source type names as appear in rrf: RXNORM, SNOMEDCT, MSH, etc.
+//    * @param cuiCodes    current list of cui codes
+//    * @return Subset of cuis that don't exist in in the given sources
+//    */
+//   static public Collection<Long> getSourceTypeInvalidCuis( final String rrfPath,
+//                                                            final Collection<String> sourceTypes,
+//                                                            final Collection<Long> cuiCodes ) {
+//      final Collection<Long> validCuis = getSourceTypeValidCuis( rrfPath, sourceTypes, cuiCodes );
+//      final Predicate<Long> validCui = validCuis::contains;
+//      return cuiCodes.stream().filter( validCui.negate() ).collect( Collectors.toSet() );
+//   }
 
    static private String getToken( final List<String> tokens, final MrconsoIndex mrconsoIndex ) {
       return tokens.get( mrconsoIndex._index );

Copied: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java (from r1719001, ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiTuiUtil.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java?p2=ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java&p1=ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiTuiUtil.java&r1=1719001&r2=1719760&rev=1719760&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/CuiTuiUtil.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/MrstyParser.java Sun Dec 13 04:27:42 2015
@@ -1,164 +1,74 @@
-package org.apache.ctakes.dictionarytool.util;
+package org.apache.ctakes.dictionary.creator.gui.umls;
 
-import org.apache.ctakes.dictionarytool.reader.UmlsCuisForTuisReader;
-import org.apache.ctakes.dictionarytool.util.collection.HashSetMap;
 
-import java.util.Collection;
-import java.util.HashSet;
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.*;
+
+import static org.apache.ctakes.dictionary.creator.gui.umls.MrstyIndex.CUI;
+import static org.apache.ctakes.dictionary.creator.gui.umls.MrstyIndex.TUI;
 
-import static org.apache.ctakes.dictionarytool.util.UmlsFileName.CUI_TERM_MAP;
-import static org.apache.ctakes.dictionarytool.util.UmlsFileName.CUI_TUI_MAP;
 
 /**
  * Author: SPF
  * Affiliation: CHIP-NLP
  * Date: 1/17/14
  */
-final public class CuiTuiUtil {
-
-   private CuiTuiUtil() {
-   }
-
-   static public String getAsCui( final Long code ) {
-      final StringBuilder sb = new StringBuilder( 8 );
-      sb.append( code );
-      return getAsCui( sb );
-   }
-
-   static public String getAsCui( final String code ) {
-      if ( code.length() == 8 && code.startsWith( "C" ) ) {
-         return code;
-      }
-      final StringBuilder sb = new StringBuilder( 8 );
-      sb.append( code.replace( "C", "" ) );
-      return getAsCui( sb );
-   }
-
-   static private String getAsCui( final StringBuilder sb ) {
-      while ( sb.length() < 7 ) {
-         sb.insert( 0, '0' );
-      }
-      sb.insert( 0, 'C' );
-      return sb.toString();
-   }
-
-
-   static public Long getCuiCode( final String cui ) {
-      final String cuiText = getAsCui( cui );
-      final String cuiNum = cuiText.substring( 1, cuiText.length() );
-      try {
-         return Long.parseLong( cuiNum );
-      } catch ( NumberFormatException nfE ) {
-         System.err.println( "Could not create Cui Code for " + cui );
-      }
-      return -1l;
-   }
-
-   static public Collection<Long> getCuiCodes( final Collection<String> cuis ) {
-      final Collection<Long> cuiCodes = new HashSet<>( cuis.size() );
-      for ( String cui : cuis ) {
-         cuiCodes.add( getCuiCode( cui ) );
-      }
-      return cuiCodes;
-   }
+final public class MrstyParser {
 
-   static public String getAsTui( final Integer code ) {
-      final StringBuilder sb = new StringBuilder( 4 );
-      sb.append( code );
-      return getAsTui( sb );
-   }
-
-   static public String getAsTui( final String code ) {
-      if ( code.length() == 4 && code.startsWith( "T" ) ) {
-         return code;
-      }
-      final StringBuilder sb = new StringBuilder( 4 );
-      sb.append( code.replace( "T", "" ) );
-      return getAsTui( sb );
-   }
-
-   static private String getAsTui( final StringBuilder sb ) {
-      while ( sb.length() < 3 ) {
-         sb.insert( 0, '0' );
-      }
-      sb.insert( 0, 'T' );
-      return sb.toString();
-   }
+   static private final Logger LOGGER = LogManager.getLogger( "MrStyParser" );
 
+   static private final String MRSTY_SUB_PATH = "/META/MRSTY.RRF";
 
-   static public Collection<String> getIntAsTuis( final Collection<Integer> tuiCodes ) {
-      final Collection<String> tuis = new HashSet<>( tuiCodes.size() );
-      for ( Integer tuiCode : tuiCodes ) {
-         tuis.add( getAsTui( tuiCode ) );
-      }
-      return tuis;
+   private MrstyParser() {
    }
 
-   static public Collection<String> getStringAsTuis( final Collection<String> tuiNums ) {
-      final Collection<String> tuis = new HashSet<>( tuiNums.size() );
-      for ( String tuiNum : tuiNums ) {
-         tuis.add( getAsTui( tuiNum ) );
-      }
-      return tuis;
-   }
-
-   static public Integer getTuiCode( final String tui ) {
-      final String tuiText = getAsTui( tui );
-      final String tuiNum = tuiText.substring( 1, tuiText.length() );
-      try {
-         return Integer.parseInt( tuiNum );
-      } catch ( NumberFormatException nfE ) {
-         System.err.println( "Could not create Tui Code for " + tui );
-      }
-      return -1;
-   }
-
-   static public HashSetMap<Long, Integer> getValidCuisAndTuis( final String umlsPath,
-                                                                final Collection<String> wantedSources,
-                                                                final Collection<Integer> wantedTuis ) {
-      // get all the Cuis for the wanted Tuis.  Key = Cui, Value = Tuis to which the Cui belongs
-      final HashSetMap<Long, Integer> wantedCuisAndTuis
-            = UmlsCuisForTuisReader.readUmlsCuisForTuis( umlsPath + '/' + CUI_TUI_MAP._filename, wantedTuis );
-      if ( wantedSources.isEmpty() ) {
-         // No specified source types, assume that all sources are valid
-         return wantedCuisAndTuis;
-      }
-      // filter out the Cuis that do not belong to the given sources
-      final Collection<Long> validCuis
-            = UmlsSourceTypeCuiValidator.getSourceTypeValidCuis( umlsPath + '/' + CUI_TERM_MAP._filename,
-                                                                 wantedSources,
-                                                                 wantedCuisAndTuis.keySet() );
-      // Key = Cui, Value = Tuis to which the Cui belongs
-      final HashSetMap<Long, Integer> validCuisAndTuis = new HashSetMap<>();
-      for ( Long validCui : validCuis ) {
-         validCuisAndTuis.addAll( validCui, wantedCuisAndTuis.get( validCui ) );
-      }
-      return validCuisAndTuis;
-   }
-
-   static public HashSetMap<Long, Integer> getValidCuisAndTuis( final String umlsPath,
-                                                                final Collection<String> wantedSources,
-                                                                final Collection<Integer> wantedTuis,
-                                                                final Collection<String> termTypes ) {
-      // get all the Cuis for the wanted Tuis.  Key = Cui, Value = Tuis to which the Cui belongs
-      final HashSetMap<Long, Integer> wantedCuisAndTuis
-            = UmlsCuisForTuisReader.readUmlsCuisForTuis( umlsPath + '/' + CUI_TUI_MAP._filename, wantedTuis );
-      if ( wantedSources.isEmpty() ) {
-         // No specified source types, assume that all sources are valid
-         return wantedCuisAndTuis;
-      }
-      // filter out the Cuis that do not belong to the given sources
-      final Collection<Long> validCuis
-            = UmlsSourceTypeCuiValidator.getSourceTypeValidCuis( umlsPath + '/' + CUI_TERM_MAP._filename,
-                                                                 wantedSources,
-                                                                 wantedCuisAndTuis.keySet(),
-                                                                 termTypes );
-      // Key = Cui, Value = Tuis to which the Cui belongs
-      final HashSetMap<Long, Integer> validCuisAndTuis = new HashSetMap<>();
-      for ( Long validCui : validCuis ) {
-         validCuisAndTuis.addAll( validCui, wantedCuisAndTuis.get( validCui ) );
+   static public Map<Long, Concept> createConceptsForTuis( final String umlsPath,
+                                                           final Collection<Tui> wantedTuis ) {
+      final String mrstyPath = umlsPath + MRSTY_SUB_PATH;
+      LOGGER.info( "Compiling list of Cuis with wanted Tuis using " + mrstyPath );
+      long lineCount = 0;
+      final Map<Long,Concept> wantedConcepts = new HashMap<>();
+      final Collection<Tui> usedTuis = new HashSet<>( wantedTuis.size() );
+      try (final BufferedReader reader = FileUtil.createReader( mrstyPath ) ) {
+         List<String> tokens = FileUtil.readBsvTokens( reader, mrstyPath );
+         while ( tokens != null ) {
+            lineCount++;
+            if ( tokens.size() > TUI._index ) {
+               final Tui tuiEnum = Tui.valueOf( tokens.get( TUI._index ) );
+               if ( !wantedTuis.contains( tuiEnum ) ) {
+                  tokens = FileUtil.readBsvTokens( reader, mrstyPath );
+                  continue;
+               }
+               final Long cuiCode = CuiCodeUtil.getInstance().getCuiCode( tokens.get( CUI._index ) );
+               Concept concept = wantedConcepts.get( cuiCode );
+               if ( concept == null ) {
+                  concept = new Concept();
+                  wantedConcepts.put( cuiCode, concept );
+               }
+               concept.addTui( tuiEnum );
+               usedTuis.add( tuiEnum );
+            }
+            if ( lineCount % 100000 == 0 ) {
+               LOGGER.info( "File Line " + lineCount + "\t Valid Cuis " + wantedConcepts.size() );
+            }
+            tokens = FileUtil.readBsvTokens( reader, mrstyPath );
+         }
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+      }
+      LOGGER.info( "File Lines " + lineCount + "\t Valid Cuis " + wantedConcepts.size() + "\t for wanted Tuis" );
+      if ( usedTuis.size() != wantedTuis.size() ) {
+         wantedTuis.removeAll( usedTuis );
+         for ( Tui missingTui : wantedTuis ) {
+            LOGGER.warn( "Could not find Cuis for Tui " + missingTui + " " + missingTui.getDescription() );
+         }
       }
-      return validCuisAndTuis;
+      return wantedConcepts;
    }
 
 }

Modified: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java?rev=1719760&r1=1719759&r2=1719760&view=diff
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/SourceTableModel.java Sun Dec 13 04:27:42 2015
@@ -1,7 +1,6 @@
 package org.apache.ctakes.dictionary.creator.gui.umls;
 
 
-import org.apache.ctakes.dictionary.creator.util.FileUtil;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
@@ -9,9 +8,6 @@ import javax.swing.event.EventListenerLi
 import javax.swing.event.TableModelEvent;
 import javax.swing.event.TableModelListener;
 import javax.swing.table.TableModel;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
 import java.util.*;
 
 /**
@@ -23,66 +19,36 @@ final public class SourceTableModel impl
 
    static private final Logger LOGGER = LogManager.getLogger( "SourceTableModel" );
 
-   static private final String[] COLUMN_NAMES = { "Source", "Target", "Library" };
+   static private final String[] COLUMN_NAMES = { "Source", "Target", "Vocabulary" };
    static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, Boolean.class, String.class };
 
    static private final String[] CTAKES_SOURCES = { "SNOMEDCT", "SNOMEDCT_US", "RXNORM" };
 
    private final EventListenerList _listenerList = new EventListenerList();
-   private final Map<String,Boolean> _useSource = new HashMap<>();
-   private final Map<String,Boolean> _useTarget = new HashMap<>();
+   private final Collection<String> _wantedSources = new HashSet<>();
+   private final Collection<String> _wantedTargets = new HashSet<>();
    private final List<String> _sources = new ArrayList<>();
 
 
 
-   public void initialize( final String umlsDirPath ) {
+
+   public void setSources( final Collection<String> sources ) {
       _sources.clear();
-      _useSource.clear();
-      _useTarget.clear();
-      File mrConso = new File( umlsDirPath, "MRCONSO.RRF" );
-      if ( !mrConso.exists() ) {
-         mrConso = new File( umlsDirPath + "/META/MRCONSO.RRF" );
-         if ( !mrConso.exists() ) {
-            LOGGER.error( "No MRCONSO.RRF in " + umlsDirPath );
-            return;
-         }
-      }
-      final String mrConsoPath = mrConso.getPath();
-      LOGGER.info( "Parsing source types from " + mrConsoPath );
-      final Collection<String> sources = new HashSet<>();
-      try ( final BufferedReader reader = FileUtil.createReader( mrConsoPath ) ) {
-         int lineCount = 0;
-         List<String> tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
-         while ( tokens != null ) {
-            lineCount++;
-            if ( tokens.size() > MrconsoIndex.SOURCE._index ) {
-               sources.add( tokens.get( MrconsoIndex.SOURCE._index ) );
-            }
-            if ( lineCount % 100000 == 0 ) {
-               LOGGER.info( "File Line " + lineCount + "\t Sources " + sources.size() );
-            }
-            tokens = FileUtil.readBsvTokens( reader, mrConsoPath );
-         }
-         LOGGER.info( "Parsed " + sources.size() + " source types" );
-      } catch ( IOException ioE ) {
-         LOGGER.error( ioE.getMessage() );
-      }
+      _wantedSources.clear();
+      _wantedTargets.clear();
       _sources.addAll( sources );
       Collections.sort( _sources );
-      for ( String source : CTAKES_SOURCES ) {
-         _useSource.put( source, true );
-         _useTarget.put( source, true );
-      }
+      _wantedSources.addAll( Arrays.asList( CTAKES_SOURCES ) );
+      _wantedTargets.addAll( Arrays.asList( CTAKES_SOURCES ) );
       fireTableChanged( new TableModelEvent( this ) );
    }
 
-
    public Collection<String> getWantedSources() {
-      return _useSource.keySet();
+      return _wantedSources;
    }
 
    public Collection<String> getWantedTargets() {
-      return _useTarget.keySet();
+      return _wantedTargets;
    }
 
    /**
@@ -140,13 +106,11 @@ final public class SourceTableModel impl
    }
 
    private boolean isSourceEnabled( final String source ) {
-      final Boolean enabled = _useSource.get( source );
-      return enabled != null && enabled;
+      return _wantedSources.contains( source );
    }
 
    private boolean isTargetEnabled( final String source) {
-      final Boolean enabled = _useTarget.get( source );
-      return enabled != null && enabled;
+      return _wantedTargets.contains( source );
    }
 
    /**
@@ -157,13 +121,29 @@ final public class SourceTableModel impl
       if ( aValue instanceof Boolean ) {
          final String source = _sources.get( rowIndex );
          if ( columnIndex == 0 ) {
-            _useSource.put( source, (Boolean)aValue );
+            selectWantedSource( source, (Boolean)aValue );
          } else if ( columnIndex == 1 ) {
-            _useTarget.put( source, (Boolean)aValue );
+            selectWantedTarget( source, (Boolean)aValue );
          }
       }
    }
 
+   private void selectWantedSource( final String source, final boolean select ) {
+      if ( select ) {
+         _wantedSources.add( source );
+      } else {
+         _wantedSources.remove( source );
+      }
+   }
+
+   private void selectWantedTarget( final String target, final boolean select ) {
+      if ( select ) {
+         _wantedTargets.add( target );
+      } else {
+         _wantedTargets.remove( target );
+      }
+   }
+
    /**
     * {@inheritDoc}
     */

Modified: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java?rev=1719760&r1=1719759&r2=1719760&view=diff
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Tui.java Sun Dec 13 04:27:42 2015
@@ -153,7 +153,9 @@ public enum Tui {
    T059( "Laboratory Procedure" ),
    T063( "Molecular Biology Research Technique" ),
    T062( "Research Activity" ),
-   T061( "Therapeutic or Preventive Procedure" );
+   T061( "Therapeutic or Preventive Procedure" ),
+   // ERROR
+   T999( "Error" );
 
    final private String _description;
    private Tui( final String description ) {
@@ -164,4 +166,20 @@ public enum Tui {
       return _description;
    }
 
+   public int getIntValue() {
+      return Integer.parseInt( name().substring( 1 ) );
+   }
+
+//   static public Tui valueOf( final String text ) {
+//
+//
+//
+//      for ( Tui tuiEnum : Tui.values() ) {
+//         if ( tuiEnum.name().equals( text ) ) {
+//            return tuiEnum;
+//         }
+//      }
+//      return Tui.T999;
+//   }
+
 }

Added: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java?rev=1719760&view=auto
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java (added)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiCellRenderer.java Sun Dec 13 04:27:42 2015
@@ -0,0 +1,40 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import javax.swing.*;
+import javax.swing.table.TableCellRenderer;
+import java.awt.*;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/11/2015
+ */
+final public class TuiCellRenderer implements TableCellRenderer {
+
+   static private final Logger LOGGER = LogManager.getLogger( "TuiCellRenderer" );
+
+   private final TuiTableModel _tuiModel;
+   private final TableCellRenderer _delegate;
+
+   public TuiCellRenderer( final TuiTableModel tuiModel, final TableCellRenderer delegate ) {
+      _tuiModel = tuiModel;
+      _delegate = delegate;
+   }
+
+
+   public Component getTableCellRendererComponent( final JTable table, final Object value,
+                                           final boolean isSelected, final boolean hasFocus,
+                                           final int row, final int column ) {
+      final Component renderer
+            = _delegate.getTableCellRendererComponent( table, value, isSelected, hasFocus, row, column );
+//      final Tui tui = _tuiModel.
+
+      return renderer;
+   }
+
+
+}

Modified: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java?rev=1719760&r1=1719759&r2=1719760&view=diff
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/TuiTableModel.java Sun Dec 13 04:27:42 2015
@@ -6,8 +6,9 @@ import org.apache.logging.log4j.Logger;
 import javax.swing.event.EventListenerList;
 import javax.swing.event.TableModelListener;
 import javax.swing.table.TableModel;
-import java.util.EnumMap;
-import java.util.Map;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.EnumSet;
 
 import static org.apache.ctakes.dictionary.creator.gui.umls.Tui.*;
 
@@ -20,37 +21,30 @@ final public class TuiTableModel impleme
 
    static private final Logger LOGGER = LogManager.getLogger( "TuiTableModel" );
 
-   static private final Tui[] CTAKES_ANAT = { T021, T022, T023, T024, T025, T026, T029, T030 };
+   static public final Tui[] CTAKES_ANAT = { T021, T022, T023, T024, T025, T026, T029, T030 };
    static private final Tui[] CTAKES_DISO = { T019, T020, T037, T047, T048, T049, T050, T190, T191 };
    static private final Tui[] CTAKES_FIND = { T033, T034, T040, T041, T042, T043, T044, T045, T046, T056, T057, T184 };
    static private final Tui[] CTAKES_PROC = { T059, T060, T061 };
-   static private final Tui[] CTAKES_DRUG = { T109, T110, T114, T115, T116, T118, T119, T121, T122, T123, T124,
+   static public final Tui[] CTAKES_DRUG = { T109, T110, T114, T115, T116, T118, T119, T121, T122, T123, T124,
                                               T125, T126, T127, T129, T130, T131, T195, T196, T197, T200, T203 };
 
    static private final String[] COLUMN_NAMES = { "Use", "TUI", "Definition" };
    static private final Class<?>[] COLUMN_CLASSES = { Boolean.class, String.class, String.class };
 
    private final EventListenerList _listenerList = new EventListenerList();
-   private final Map<Tui,Boolean> _usedTuis = new EnumMap<>( Tui.class );
+   private final Collection<Tui> _wantedTuis = EnumSet.noneOf( Tui.class );
 
    public TuiTableModel() {
-      for ( Tui tui : CTAKES_ANAT ) {
-         _usedTuis.put( tui, true );
-      }
-      for ( Tui tui : CTAKES_DISO ) {
-         _usedTuis.put( tui, true );
-      }
-      for ( Tui tui : CTAKES_FIND ) {
-         _usedTuis.put( tui, true );
-      }
-      for ( Tui tui : CTAKES_PROC ) {
-         _usedTuis.put( tui, true );
-      }
-      for ( Tui tui : CTAKES_DRUG ) {
-         _usedTuis.put( tui, true );
-      }
+      _wantedTuis.addAll( Arrays.asList( CTAKES_ANAT ) );
+      _wantedTuis.addAll( Arrays.asList( CTAKES_DISO ) );
+      _wantedTuis.addAll( Arrays.asList( CTAKES_FIND ) );
+      _wantedTuis.addAll( Arrays.asList( CTAKES_PROC ) );
+      _wantedTuis.addAll( Arrays.asList( CTAKES_DRUG ) );
    }
 
+   public Collection<Tui> getWantedTuis() {
+      return _wantedTuis;
+   }
 
    /**
     * {@inheritDoc}
@@ -107,8 +101,7 @@ final public class TuiTableModel impleme
    }
 
    private boolean isTuiEnabled( final Tui tui ) {
-      final Boolean enabled = _usedTuis.get( tui );
-      return enabled != null && enabled;
+      return _wantedTuis.contains( tui );
    }
 
    /**
@@ -118,7 +111,11 @@ final public class TuiTableModel impleme
    public void setValueAt( final Object aValue, final int rowIndex, final int columnIndex ) {
       if ( aValue instanceof Boolean && columnIndex == 0 ) {
          final Tui tui = Tui.values()[ rowIndex ];
-         _usedTuis.put( tui, (Boolean)aValue );
+         if ( (Boolean)aValue ) {
+            _wantedTuis.add( tui );
+         } else {
+            _wantedTuis.remove( tui );
+         }
       }
    }
 

Copied: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/UmlsTermUtil.java (from r1719001, ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/UmlsTermUtil.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/UmlsTermUtil.java?p2=ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/UmlsTermUtil.java&p1=ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/UmlsTermUtil.java&r1=1719001&r2=1719760&rev=1719760&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/UmlsTermUtil.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/UmlsTermUtil.java Sun Dec 13 04:27:42 2015
@@ -1,7 +1,7 @@
-package org.apache.ctakes.dictionarytool.util;
+package org.apache.ctakes.dictionary.creator.gui.umls;
 
-import org.apache.ctakes.dictionarytool.util.token.TextTokenizer;
-import org.apache.ctakes.dictionarytool.util.token.TextTokenizerCtakesPTB;
+import org.apache.ctakes.dictionary.creator.util.FileUtil;
+import org.apache.ctakes.dictionary.creator.util.TextTokenizer;
 
 import java.util.Collection;
 import java.util.Collections;

Added: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Vocabulary.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Vocabulary.java?rev=1719760&view=auto
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Vocabulary.java (added)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/gui/umls/Vocabulary.java Sun Dec 13 04:27:42 2015
@@ -0,0 +1,105 @@
+package org.apache.ctakes.dictionary.creator.gui.umls;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+public enum Vocabulary {
+   INSTANCE;
+   static public Vocabulary getInstance() {
+      return INSTANCE;
+   }
+
+   private final Logger LOGGER = LogManager.getLogger( "Vocabulary" );
+
+   private final Map<String,Class<?>> _vocabularyClasses = new HashMap<>();
+
+   public Collection<String> getAllVocabularies() {
+      return _vocabularyClasses.keySet();
+   }
+
+   public Class<?> getVocabularyClass( final String vocabulary ) {
+      return _vocabularyClasses.get( vocabulary );
+   }
+
+   public void addVocabulary( final String vocabulary, final String code ) {
+      final Class<?> vocabularyClass = _vocabularyClasses.get( vocabulary );
+      if ( String.class.equals( vocabularyClass ) ) {
+         return;
+      }
+      _vocabularyClasses.put( vocabulary, getBestClass( code ) );
+   }
+
+   public String getJdbcClass( final String vocabulary ) {
+      final Class<?> vocabularyClass = _vocabularyClasses.get( vocabulary );
+      if ( String.class.equals( vocabularyClass ) ) {
+         return "VARCHAR(48)";
+      } else if ( Double.class.equals( vocabularyClass ) ) {
+         return "FLOAT";
+      } else if ( Long.class.equals( vocabularyClass ) ) {
+         return "BIGINT";
+      } else if ( Integer.class.equals( vocabularyClass ) ) {
+         return "INTEGER";
+      } else {
+         LOGGER.error( "Could not derive database class for " + vocabularyClass.getName() );
+      }
+      return "VARCHAR(48)";
+   }
+
+   public String getCtakesClass( final String vocabulary ) {
+      final Class<?> vocabularyClass = _vocabularyClasses.get( vocabulary );
+      if ( String.class.equals( vocabularyClass ) ) {
+         return "text";
+      } else if ( Double.class.equals( vocabularyClass ) ) {
+         return "double";
+      } else if ( Long.class.equals( vocabularyClass ) ) {
+         return "long";
+      } else if ( Integer.class.equals( vocabularyClass ) ) {
+         return "int";
+      } else {
+         LOGGER.error( "Could not derive database class for " + vocabularyClass.getName() );
+      }
+      return "text";
+   }
+
+   static private Class<?> getBestClassFuture( final String code, final Class<?> currentClass ) {
+      boolean haveDot = false;
+      for ( char c : code.toCharArray() ) {
+         if ( !Character.isDigit( c ) ) {
+            if ( c == '.' ) {
+               if ( haveDot ) {
+                  return String.class;
+               }
+               haveDot = true;
+            }
+            return String.class;
+         }
+      }
+      if ( haveDot || Double.class.equals( currentClass ) ) {
+         return Double.class;
+      }
+      if ( code.length() > 9 || Long.class.equals( currentClass ) ) {
+         return Long.class;
+      }
+      return Integer.class;
+   }
+
+   // TODO replace with getBestClassFuture when ctakes is upgraded to accept double and int
+   static private Class<?> getBestClass( final String code ) {
+      for ( char c : code.toCharArray() ) {
+         if ( !Character.isDigit( c ) ) {
+            return String.class;
+         }
+      }
+      return Long.class;
+   }
+
+}

Added: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/HsqlUtil.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/HsqlUtil.java?rev=1719760&view=auto
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/HsqlUtil.java (added)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/HsqlUtil.java Sun Dec 13 04:27:42 2015
@@ -0,0 +1,111 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+
+import org.apache.ctakes.dictionary.creator.gui.umls.Vocabulary;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.*;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+final public class HsqlUtil {
+
+   static private final Logger LOGGER = LogManager.getLogger( "HsqlUtil" );
+
+   static public final String URL_PREFIX = "jdbc:hsqldb:file:";
+
+   private HsqlUtil() {
+   }
+
+
+   static public boolean createDatabase( final String databasePath, final String databaseName ) {
+      final File databaseDir = new File( databasePath, databaseName );
+      if ( databaseDir.isFile() ) {
+         LOGGER.error( databaseDir.getPath() + " exists as a file.  Hsqldb requires that path to be a directory" );
+         return false;
+      }
+      databaseDir.mkdirs();
+      return writePropertiesFile( databaseDir, databaseName )
+             && writeScriptFile( databaseDir, databaseName )
+             && writeRcFile( databaseDir, databaseName );
+   }
+
+   static private boolean writePropertiesFile( final File databaseDir, final String databaseName ) {
+      final File propertiesFile = new File( databaseDir, databaseName + ".properties" );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( propertiesFile ) ) ) {
+         writer.write( "#HSQL Database Engine 1.8.0.10\n" );
+         writer.write( "#Thu Sep 04 09:49:09 EDT 2014\n" );
+         writer.write( "hsqldb.script_format=0\n" );
+         writer.write( "runtime.gc_interval=0\n" );
+         writer.write( "sql.enforce_strict_size=false\n" );
+         writer.write( "hsqldb.cache_size_scale=8\n" );
+         writer.write( "readonly=false\n" );
+         writer.write( "hsqldb.nio_data_file=true\n" );
+         writer.write( "hsqldb.cache_scale=14\n" );
+         writer.write( "version=1.8.0\n" );
+         writer.write( "hsqldb.default_table_type=memory\n" );
+         writer.write( "hsqldb.cache_file_scale=1\n" );
+         writer.write( "hsqldb.log_size=200\n" );
+         writer.write( "modified=no\n" );
+         writer.write( "hsqldb.cache_version=1.7.0\n" );
+         writer.write( "hsqldb.original_version=1.8.0\n" );
+         writer.write( "hsqldb.compatible_version=1.8.0\n\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+   static private boolean writeScriptFile( final File databaseDir, final String databaseName ) {
+      final File scriptFile = new File( databaseDir, databaseName + ".script" );
+      try ( final Writer writer = new BufferedWriter( new FileWriter( scriptFile ) ) ) {
+         writer.write( "CREATE SCHEMA PUBLIC AUTHORIZATION DBA\n" );
+         // main table
+         writer.write( "CREATE MEMORY TABLE CUI_TERMS(CUI BIGINT,RINDEX INTEGER,TCOUNT INTEGER,TEXT VARCHAR(255),RWORD VARCHAR(48))\n" );
+         writer.write( "CREATE INDEX IDX_CUI_TERMS ON CUI_TERMS(RWORD)\n" );
+         // tui table
+         writer.write( "CREATE MEMORY TABLE TUI(CUI BIGINT,TUI INTEGER)\n" );
+         writer.write( "CREATE INDEX IDX_TUI ON TUI(CUI)\n" );
+         // preferred term table
+         writer.write( "CREATE MEMORY TABLE PREFTERM(CUI BIGINT,PREFTERM VARCHAR(255))\n" );
+         writer.write( "CREATE INDEX IDX_PREFTERM ON PREFTERM(CUI)\n" );
+         // vocabulary tables
+         for ( String vocabulary : Vocabulary.getInstance().getAllVocabularies() ) {
+            final String jdbcClass = Vocabulary.getInstance().getJdbcClass( vocabulary );
+            final String tableName = vocabulary.replace( '.','_' );
+            writer.write( "CREATE MEMORY TABLE " + tableName + "(CUI BIGINT," + tableName + " " + jdbcClass + ")\n" );
+            writer.write( "CREATE INDEX IDX_" + tableName + " ON " + tableName + "(CUI)\n" );
+         }
+         writer.write( "CREATE USER SA PASSWORD \"\"\n" );
+         writer.write( "GRANT DBA TO SA\n" );
+         writer.write( "SET WRITE_DELAY 10\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+   static private boolean writeRcFile( final File databaseDir, final String databaseName ) {
+      final File scriptFile = new File( databaseDir, databaseName + ".rc" );
+      final String url = HsqlUtil.URL_PREFIX + databaseDir.getPath().replace( '\\', '/' )
+                         + "/" + databaseName;
+      try ( final Writer writer = new BufferedWriter( new FileWriter( scriptFile ) ) ) {
+         writer.write( "urlid " + databaseName + "\n" );
+         writer.write( "url " + url + ";shutdown=true\n" );
+         writer.write( "username sa\n" );
+         writer.write( "password\n" );
+      } catch ( IOException ioE ) {
+         LOGGER.error( ioE.getMessage() );
+         return false;
+      }
+      return true;
+   }
+
+
+}

Copied: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/JdbcUtil.java (from r1719001, ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/JdbcUtil.java)
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/JdbcUtil.java?p2=ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/JdbcUtil.java&p1=ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/JdbcUtil.java&r1=1719001&r2=1719760&rev=1719760&view=diff
==============================================================================
--- ctakes/sandbox/dictionarytool/src/org/apache/ctakes/dictionarytool/util/JdbcUtil.java (original)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/JdbcUtil.java Sun Dec 13 04:27:42 2015
@@ -1,4 +1,7 @@
-package org.apache.ctakes.dictionarytool.util;
+package org.apache.ctakes.dictionary.creator.util;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 
 import java.sql.Connection;
 import java.sql.Driver;
@@ -12,6 +15,8 @@ import java.sql.SQLException;
  */
 final public class JdbcUtil {
 
+   static private final Logger LOGGER = LogManager.getLogger( "JdbcUtil" );
+   
    private JdbcUtil() {
    }
 
@@ -24,22 +29,22 @@ final public class JdbcUtil {
          DriverManager.registerDriver( driver );
       } catch ( Exception e ) {
          // TODO At least four different exceptions are thrown here, and should be caught and handled individually
-         System.err.println( "Could not register Driver " + JDBC_DRIVER );
-         System.err.println( e.getMessage() );
+         LOGGER.error( "Could not register Driver " + JDBC_DRIVER );
+         LOGGER.error( e.getMessage() );
          System.exit( 1 );
       }
    }
 
    static public Connection createDatabaseConnection( final String url, final String user, final String pass ) {
       registerDriver();
-      System.out.println( "Connecting to " + url + " as " + user );
+      LOGGER.info( "Connecting to " + url + " as " + user );
       Connection connection = null;
       try {
          connection = DriverManager.getConnection( url, user, pass );
       } catch ( SQLException sqlE ) {
          // thrown by Connection.prepareStatement(..) and getTotalRowCount(..)
-         System.err.println( "Could not establish connection to " + url + " as " + user );
-         System.err.println( sqlE.getMessage() );
+         LOGGER.error( "Could not establish connection to " + url + " as " + user );
+         LOGGER.error( sqlE.getMessage() );
          System.exit( 1 );
       }
       return connection;
@@ -56,6 +61,10 @@ final public class JdbcUtil {
       return createRowInsertSql( tableName, fieldNames );
    }
 
+   static public String createCodeInsertSql( final String vocabulary ) {
+      return createRowInsertSql( vocabulary.toLowerCase().replace( '.','_' ), "CUI", vocabulary );
+   }
+
    static public String createRowInsertSql( final String tableName, final String... fieldNames ) {
       final StringBuilder sb = new StringBuilder( "insert into" );
       sb.append( " " ).append( tableName );

Added: ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/LambdaUtil.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/LambdaUtil.java?rev=1719760&view=auto
==============================================================================
--- ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/LambdaUtil.java (added)
+++ ctakes/sandbox/dictionary-gui/src/main/java/org/apache/ctakes/dictionary/creator/util/LambdaUtil.java Sun Dec 13 04:27:42 2015
@@ -0,0 +1,28 @@
+package org.apache.ctakes.dictionary.creator.util;
+
+
+import java.util.function.BinaryOperator;
+import java.util.function.Function;
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 12/12/2015
+ */
+final public class LambdaUtil {
+
+   private LambdaUtil() {}
+
+   static public final Function<String, String> asSelf = value -> value;
+
+   static public final Function<String, Integer> zeroInt = value -> 0;
+   static public final Function<String, Long> zeroLong = value -> 0l;
+
+   static public final Function<String, Integer> one = value -> 1;
+
+   static public final BinaryOperator<Integer> sumInt = ( count1, count2 ) -> count1 + count2;
+   static public final BinaryOperator<Long> sumLong = ( count1, count2 ) -> count1 + count2;
+
+
+
+}



Mime
View raw message