ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1624032 [3/3] - in /ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2: ae/ concept/ consumer/ dictionary/ relation/ term/ textspan/ util/ util/collection/
Date Wed, 10 Sep 2014 15:27:25 GMT
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/RareWordDictionary.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/RareWordDictionary.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/RareWordDictionary.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/RareWordDictionary.java Wed Sep 10 15:27:24 2014
@@ -18,8 +18,8 @@
  */
 package org.apache.ctakes.dictionary.lookup2.dictionary;
 
-import org.apache.ctakes.dictionary.lookup2.util.FastLookupToken;
 import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
+import org.apache.ctakes.dictionary.lookup2.util.FastLookupToken;
 
 import java.util.Collection;
 
@@ -34,18 +34,15 @@ public interface RareWordDictionary {
    /**
     * The Type identifier and Name are used to maintain a collection of dictionaries,
     * so the combination of Type and Name should be unique for each dictionary if possible.
+    *
     * @return simple name for the dictionary
     */
    public String getName();
 
    /**
-    * @return the type of term that exists in the dictionary: Anatomical Site, Disease/Disorder, Drug, etc.
-    */
-   public String getSemanticGroup();
-
-   /**
     * Any single token can exist in zero or more terms in the dictionary.  It may exist as its -own- form or as an
     * alternate canonical variant.  This method will check the dictionary for both
+    *
     * @param fastLookupToken a single-word token
     * @return zero or more terms that contain the lookup token
     */

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/RareWordTermMapCreator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/RareWordTermMapCreator.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/RareWordTermMapCreator.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/RareWordTermMapCreator.java Wed Sep 10 15:27:24 2014
@@ -19,21 +19,21 @@
 package org.apache.ctakes.dictionary.lookup2.dictionary;
 
 import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
+import org.apache.ctakes.dictionary.lookup2.util.CuiCodeUtil;
 import org.apache.ctakes.dictionary.lookup2.util.LookupUtil;
+import org.apache.ctakes.dictionary.lookup2.util.TuiCodeUtil;
+import org.apache.ctakes.dictionary.lookup2.util.collection.ArrayListMap;
+import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.logging.Logger;
 
 /**
- * Given a collection of {@link CuiTuiTerm} Objects,
+ * Given a collection of {@link org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator.CuiTerm} Objects,
  * this factory can create a Map of {@link org.apache.ctakes.dictionary.lookup2.term.RareWordTerm} collections
  * indexed by rare word.
  * This map can be used to create a {@link MemRareWordDictionary}
- *
+ * <p/>
  * Author: SPF
  * Affiliation: CHIP-NLP
  * Date: 1/9/14
@@ -42,9 +42,10 @@ final public class RareWordTermMapCreato
 
    static private final Logger LOGGER = Logger.getLogger( "RareWordTermMapCreator" );
 
-   private RareWordTermMapCreator() {}
+   private RareWordTermMapCreator() {
+   }
 
-   static private final String [] PREFIXES = {
+   static private final String[] PREFIXES = {
          "e-",
          "a-",
          "u-",
@@ -100,8 +101,8 @@ final public class RareWordTermMapCreato
          "ortho-",
          "phospho-",
    };
-   static private final String [] SUFFIXES = {"-esque", "-ette", "-fest", "-fold", "-gate", "-itis", "-less", "-most",
-                                              "-o-torium", "-rama", "-wise"};
+   static private final String[] SUFFIXES = { "-esque", "-ette", "-fest", "-fold", "-gate", "-itis", "-less", "-most",
+                                              "-o-torium", "-rama", "-wise" };
 
    // LookupDesc for the standard excluded pos tags are
    //   VB,VBD,VBG,VBN,VBP,VBZ,CC,CD,DT,EX,LS,MD,PDT,POS,PP,PP$,PRP,PRP$,RP,TO,WDT,WP,WPS,WRB
@@ -146,32 +147,27 @@ final public class RareWordTermMapCreato
          "how", "where", "when", "however", "wherever", "whenever",
    };
 
-   static public Map<String,Collection<RareWordTerm>> createRareWordTermMap( final Collection<CuiTuiTerm> cuiTuiTerms ) {
-      final Map<String,Collection<RareWordTerm>> rareWordTermMap = new HashMap<String,Collection<RareWordTerm>>();
-      final Map<String,Integer> tokenCountMap = createTokenCountMap( cuiTuiTerms );
-      for ( CuiTuiTerm cuiTuiTerm : cuiTuiTerms ) {
-         final String rareWord = getRareWord( cuiTuiTerm.getTerm(), tokenCountMap );
-         final int wordIndex = getWordIndex( cuiTuiTerm.getTerm(), rareWord );
-         final int tokenCount = getTokenCount( cuiTuiTerm.getTerm() );
+   static public CollectionMap<String, RareWordTerm> createRareWordTermMap( final Collection<CuiTerm> cuiTerms ) {
+      final CollectionMap<String, RareWordTerm> rareWordTermMap = new ArrayListMap<>();
+      final Map<String, Integer> tokenCountMap = createTokenCountMap( cuiTerms );
+      for ( CuiTerm cuiTerm : cuiTerms ) {
+         final String rareWord = getRareWord( cuiTerm.getTerm(), tokenCountMap );
+         final int wordIndex = getWordIndex( cuiTerm.getTerm(), rareWord );
+         final int tokenCount = getTokenCount( cuiTerm.getTerm() );
          if ( wordIndex < 0 ) {
-            LOGGER.warning( "Bad Rare Word Index for " + rareWord + " in " + cuiTuiTerm.getTerm() );
+            LOGGER.warning( "Bad Rare Word Index for " + rareWord + " in " + cuiTerm.getTerm() );
             continue;
          }
-         Collection<RareWordTerm> rareWordTerms = rareWordTermMap.get( rareWord );
-         if ( rareWordTerms == null ) {
-            rareWordTerms = new ArrayList<RareWordTerm>();
-            rareWordTermMap.put( rareWord, rareWordTerms );
-         }
-         rareWordTerms.add( new RareWordTerm( cuiTuiTerm.getTerm(), cuiTuiTerm.__cui, cuiTuiTerm.__tui,
-                                              rareWord, wordIndex, tokenCount ) );
+         rareWordTermMap.placeValue( rareWord, new RareWordTerm( cuiTerm.getTerm(), cuiTerm.__cui,
+               rareWord, wordIndex, tokenCount ) );
       }
       return rareWordTermMap;
    }
 
-   static private Map<String,Integer> createTokenCountMap( final Collection<CuiTuiTerm> cuiTuiTerms ) {
-      final Map<String,Integer> tokenCountMap = new HashMap<String, Integer>();
-      for ( CuiTuiTerm cuiTuiTerm : cuiTuiTerms ) {
-         final String[] tokens = LookupUtil.fastSplit( cuiTuiTerm.getTerm(), ' ' );
+   static private Map<String, Integer> createTokenCountMap( final Collection<CuiTerm> cuiTerms ) {
+      final Map<String, Integer> tokenCountMap = new HashMap<>();
+      for ( CuiTerm cuiTerm : cuiTerms ) {
+         final String[] tokens = LookupUtil.fastSplit( cuiTerm.getTerm(), ' ' );
          for ( String token : tokens ) {
             if ( isRarableToken( token ) ) {
                // Don't bother to store counts for single-character tokens
@@ -179,14 +175,14 @@ final public class RareWordTermMapCreato
                if ( count == null ) {
                   count = 0;
                }
-               tokenCountMap.put( token, (count+1) );
+               tokenCountMap.put( token, (count + 1) );
             }
          }
       }
       return tokenCountMap;
    }
 
-   static private String getRareWord( final String tokenizedTerm, final Map<String,Integer> tokenCountMap ) {
+   static private String getRareWord( final String tokenizedTerm, final Map<String, Integer> tokenCountMap ) {
       final String[] tokens = LookupUtil.fastSplit( tokenizedTerm, ' ' );
       if ( tokens.length == 1 ) {
          return tokens[0];
@@ -210,7 +206,7 @@ final public class RareWordTermMapCreato
          return false;
       }
       boolean hasLetter = false;
-      for ( int i=0; i<token.length(); i++ ) {
+      for ( int i = 0; i < token.length(); i++ ) {
          if ( Character.isLetter( token.charAt( i ) ) ) {
             hasLetter = true;
             break;
@@ -244,7 +240,6 @@ final public class RareWordTermMapCreato
    }
 
 
-
    // Can also use:
    // tokenizer = new TokenizerPTB();  List<Token> tokenList = tokenizer.tokenize( term );
    // for( token ) {
@@ -270,15 +265,15 @@ final public class RareWordTermMapCreato
          }
       }
       // trim whitespace
-      sb.setLength( Math.max( 0, sb.length()-1 ) );
+      sb.setLength( Math.max( 0, sb.length() - 1 ) );
       return sb.toString();
    }
 
    static private List<String> getTokens( final String word ) {
-      final List<String> tokens = new ArrayList<String>();
+      final List<String> tokens = new ArrayList<>();
       final StringBuilder sb = new StringBuilder();
       final int count = word.length();
-      for ( int i=0; i<count; i++ ) {
+      for ( int i = 0; i < count; i++ ) {
          final char c = word.charAt( i );
          if ( Character.isLetterOrDigit( c ) ) {
             sb.append( c );
@@ -289,7 +284,7 @@ final public class RareWordTermMapCreato
                tokens.add( sb.toString() );
                sb.setLength( 0 );
             }
-            tokens.add( ""+c );
+            tokens.add( "" + c );
             continue;
          }
          final boolean isPrefix = isPrefix( sb.toString() );
@@ -298,7 +293,7 @@ final public class RareWordTermMapCreato
             sb.append( '-' );
             continue;
          }
-         final boolean isSuffix = isSuffix( word, i+1 );
+         final boolean isSuffix = isSuffix( word, i + 1 );
          if ( isSuffix ) {
             // what follows is a suffix, so append the dash and move on
             sb.append( '-' );
@@ -308,7 +303,7 @@ final public class RareWordTermMapCreato
             tokens.add( sb.toString() );
             sb.setLength( 0 );
          }
-         tokens.add( ""+c );
+         tokens.add( "" + c );
       }
       if ( sb.length() != 0 ) {
          tokens.add( sb.toString() );
@@ -346,7 +341,7 @@ final public class RareWordTermMapCreato
    static private String getNextCharTerm( final String word ) {
       final StringBuilder sb = new StringBuilder();
       final int count = word.length();
-      for ( int i=0; i<count; i++ ) {
+      for ( int i = 0; i < count; i++ ) {
          final char c = word.charAt( i );
          if ( !Character.isLetterOrDigit( c ) ) {
             return sb.toString();
@@ -357,32 +352,32 @@ final public class RareWordTermMapCreato
    }
 
 
-   static public class CuiTuiTerm {
+   static public class CuiTerm {
+
       final private String __term;
-      final private String __cui;
-      final private String __tui;
+      final private Long __cui;
       final private int __hashcode;
-      public CuiTuiTerm( final String cui, final String tui, final String term ) {
+
+      public CuiTerm( final String cui, final String term ) {
          __term = getTokenizedTerm( term );
-         __cui = cui.startsWith( "C" ) ? cui : "C"+cui;
-         __tui = tui.startsWith( "T" ) ? tui : "T"+tui;
-         __hashcode = (__cui+"_"+__tui+"_"+__term).hashCode();
+         __cui = CuiCodeUtil.getCuiCode( cui );
+         __hashcode = (__cui + "_" + __term).hashCode();
       }
-      public String getCui() {
+
+      public Long getCui() {
          return __cui;
       }
-      public String getTui() {
-         return __tui;
-      }
+
       public String getTerm() {
          return __term;
       }
+
       public boolean equals( final Object value ) {
-         return value instanceof CuiTuiTerm
-               && __term.equals( ((CuiTuiTerm)value).__term )
-               && __cui.equals( ((CuiTuiTerm)value).__cui )
-               && __tui.equals( ((CuiTuiTerm)value).__tui );
+         return value instanceof CuiTerm
+                && __term.equals( ((CuiTerm)value).__term )
+                && __cui.equals( ((CuiTerm)value).__cui );
       }
+
       public int hashCode() {
          return __hashcode;
       }

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/relation/CuiRelationsJdbc.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/relation/CuiRelationsJdbc.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/relation/CuiRelationsJdbc.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/relation/CuiRelationsJdbc.java Wed Sep 10 15:27:24 2014
@@ -1,110 +1,110 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.dictionary.lookup2.relation;
-
-import org.apache.log4j.Logger;
-
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-
-import static org.apache.ctakes.dictionary.lookup2.consumer.WsdTermConsumer.RelatedCui;
-
-/**
- * TODO  -- work in progress for use in WSD
- *
- *
- *
-* Author: SPF
-* Affiliation: CHIP-NLP
-* Date: 12/16/13
-*/
-public class CuiRelationsJdbc {
-
-   /**
-    * Column (field) indices in the database.  Notice that these are constant and not configurable.
-    * If a configurable implementation is desired then create an extension.
-    */
-   static private enum FIELD_INDEX {
-      CUI( 1 ), RELATION_TYPE( 2 );
-      final private int __index;
-      private FIELD_INDEX( final int index ) {
-         __index = index;
-      }
-   }
-
-   // LOG4J logger based on class name
-   final private Logger _logger = Logger.getLogger( getClass().getName() );
-
-   final private Connection _connection;
-   final private String _tableName;
-   private PreparedStatement _metadataStatement;
-
-   /**
-    *
-    */
-   public CuiRelationsJdbc( final Connection connection, final String tableName ) {
-      _connection = connection;
-      _tableName = tableName;
-   }
-
-   /**
-    * @param cui cui to check for relations
-    * @return all relations (cui and relation)
-    */
-   public Collection<RelatedCui> getCuiRelations( final String cui ) {
-      final List<RelatedCui> relatedCuis = new ArrayList<RelatedCui>();
-      try {
-         initMetaDataStatement( cui );
-         final ResultSet resultSet = _metadataStatement.executeQuery();
-         while ( resultSet.next() ) {
-            final RelatedCui relatedCui = new RelatedCui( resultSet.getString( FIELD_INDEX.CUI.__index),
-                                                          resultSet.getString( FIELD_INDEX.RELATION_TYPE.__index ) );
-            relatedCuis.add( relatedCui );
-         }
-         // Though the ResultSet interface documentation states that there are automatic closures,
-         // it is up to the driver to implement this behavior ...  historically some drivers have not done so
-         resultSet.close();
-      } catch ( SQLException e ) {
-         _logger.error( e.getMessage() );
-      }
-      return relatedCuis;
-   }
-
-   /**
-    *
-    * @param cui text of the rare word to use for term lookup
-    * @return an sql call to use for term lookup
-    * @throws SQLException if the {@code PreparedStatement} could not be created or changed
-    */
-   private PreparedStatement initMetaDataStatement( final String cui ) throws SQLException {
-      if ( _metadataStatement == null ) {
-         final String lookupSql = "SELECT * FROM " + _tableName + " WHERE RWORD = ?";
-         _metadataStatement = _connection.prepareStatement( lookupSql );
-      }
-      _metadataStatement.clearParameters();
-      _metadataStatement.setString( 1, cui );
-      return _metadataStatement;
-   }
-
-}
+///**
+// * Licensed to the Apache Software Foundation (ASF) under one
+// * or more contributor license agreements.  See the NOTICE file
+// * distributed with this work for additional information
+// * regarding copyright ownership.  The ASF licenses this file
+// * to you under the Apache License, Version 2.0 (the
+// * "License"); you may not use this file except in compliance
+// * with the License.  You may obtain a copy of the License at
+// *
+// *   http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing,
+// * software distributed under the License is distributed on an
+// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// * KIND, either express or implied.  See the License for the
+// * specific language governing permissions and limitations
+// * under the License.
+// */
+//package org.apache.ctakes.dictionary.lookup2.relation;
+//
+//import org.apache.log4j.Logger;
+//
+//import java.sql.Connection;
+//import java.sql.PreparedStatement;
+//import java.sql.ResultSet;
+//import java.sql.SQLException;
+//import java.util.ArrayList;
+//import java.util.Collection;
+//import java.util.List;
+//
+//import static org.apache.ctakes.dictionary.lookup2.consumer.WsdTermConsumer.RelatedCui;
+//
+///**
+// * TODO  -- work in progress for use in WSD
+// *
+// *
+// *
+//* Author: SPF
+//* Affiliation: CHIP-NLP
+//* Date: 12/16/13
+//*/
+//public class CuiRelationsJdbc {
+//
+//   /**
+//    * Column (field) indices in the database.  Notice that these are constant and not configurable.
+//    * If a configurable implementation is desired then create an extension.
+//    */
+//   static private enum FIELD_INDEX {
+//      CUI( 1 ), RELATION_TYPE( 2 );
+//      final private int __index;
+//      private FIELD_INDEX( final int index ) {
+//         __index = index;
+//      }
+//   }
+//
+//   // LOG4J logger based on class name
+//   final private Logger _logger = Logger.getLogger( getClass().getName() );
+//
+//   final private Connection _connection;
+//   final private String _tableName;
+//   private PreparedStatement _metadataStatement;
+//
+//   /**
+//    *
+//    */
+//   public CuiRelationsJdbc( final Connection connection, final String tableName ) {
+//      _connection = connection;
+//      _tableName = tableName;
+//   }
+//
+//   /**
+//    * @param cui cui to check for relations
+//    * @return all relations (cui and relation)
+//    */
+//   public Collection<RelatedCui> getCuiRelations( final String cui ) {
+//      final List<RelatedCui> relatedCuis = new ArrayList<RelatedCui>();
+//      try {
+//         initMetaDataStatement( cui );
+//         final ResultSet resultSet = _metadataStatement.executeQuery();
+//         while ( resultSet.next() ) {
+//            final RelatedCui relatedCui = new RelatedCui( resultSet.getString( FIELD_INDEX.CUI.__index),
+//                                                          resultSet.getString( FIELD_INDEX.RELATION_TYPE.__index ) );
+//            relatedCuis.add( relatedCui );
+//         }
+//         // Though the ResultSet interface documentation states that there are automatic closures,
+//         // it is up to the driver to implement this behavior ...  historically some drivers have not done so
+//         resultSet.close();
+//      } catch ( SQLException e ) {
+//         _logger.error( e.getMessage() );
+//      }
+//      return relatedCuis;
+//   }
+//
+//   /**
+//    *
+//    * @param cui text of the rare word to use for term lookup
+//    * @return an sql call to use for term lookup
+//    * @throws SQLException if the {@code PreparedStatement} could not be created or changed
+//    */
+//   private PreparedStatement initMetaDataStatement( final String cui ) throws SQLException {
+//      if ( _metadataStatement == null ) {
+//         final String lookupSql = "SELECT * FROM " + _tableName + " WHERE RWORD = ?";
+//         _metadataStatement = _connection.prepareStatement( lookupSql );
+//      }
+//      _metadataStatement.clearParameters();
+//      _metadataStatement.setString( 1, cui );
+//      return _metadataStatement;
+//   }
+//
+//}

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/term/RareWordTerm.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/term/RareWordTerm.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/term/RareWordTerm.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/term/RareWordTerm.java Wed Sep 10 15:27:24 2014
@@ -30,36 +30,31 @@ import javax.annotation.concurrent.Immut
 final public class RareWordTerm {
 
    final private String _text;
-   final private String _cui;
-   final private String _tui;
+   final private Long _cuiCode;
    final private String _rareWord;
    final private int _rareWordIndex;
    final private int _tokenCount;
    final private int _hashCode;
 
    /**
-    *
-    * @param text full text of term
-    * @param cui  umls cui for the term
-    * @param tui  semantic type tui for the term
-    * @param rareWord rare word in the term that is used for lookup
+    * @param text          full text of term
+    * @param cuiCode           umls cui for the term
+    * @param rareWord      rare word in the term that is used for lookup
     * @param rareWordIndex index of the rare word within the term
-    * @param tokenCount number of tokens within the term
+    * @param tokenCount    number of tokens within the term
     */
-   public RareWordTerm( final String text, final String cui, final String tui,
+   public RareWordTerm( final String text, final Long cuiCode,
                         final String rareWord, final int rareWordIndex,
                         final int tokenCount ) {
       _text = text;
-      _cui = cui;
-      _tui = tui;
+      _cuiCode = cuiCode;
       _rareWord = rareWord;
       _rareWordIndex = rareWordIndex;
       _tokenCount = tokenCount;
-      _hashCode = (_cui+_tui+ _text).hashCode();
+      _hashCode = (_cuiCode + _text).hashCode();
    }
 
    /**
-    *
     * @return full text of term
     */
    public String getText() {
@@ -67,23 +62,13 @@ final public class RareWordTerm {
    }
 
    /**
-    *
     * @return umls cui for the term
     */
-   public String getCui() {
-      return _cui;
+   public Long getCuiCode() {
+      return _cuiCode;
    }
 
    /**
-    *
-    * @return semantic type tui for the term
-    */
-   public String getTui() {
-      return _tui;
-   }
-
-   /**
-    *
     * @return rare word in the term that is used for lookup
     */
    public String getRareWord() {
@@ -91,7 +76,6 @@ final public class RareWordTerm {
    }
 
    /**
-    *
     * @return index of the rare word within the term
     */
    public int getRareWordIndex() {
@@ -99,7 +83,6 @@ final public class RareWordTerm {
    }
 
    /**
-    *
     * @return number of tokens within the term
     */
    public int getTokenCount() {
@@ -111,11 +94,11 @@ final public class RareWordTerm {
     */
    @Override
    public boolean equals( final Object value ) {
-      if ( !( value instanceof RareWordTerm) ) {
+      if ( !(value instanceof RareWordTerm) ) {
          return false;
       }
       final RareWordTerm other = (RareWordTerm)value;
-      return other.getCui().equals( _cui ) && other.getText().equals( _text ) && other.getTui().equals( _tui );
+      return other.getCuiCode().equals( _cuiCode ) && other.getText().equals( _text );
    }
 
    /**

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/term/SpannedRareWordTerm.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/term/SpannedRareWordTerm.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/term/SpannedRareWordTerm.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/term/SpannedRareWordTerm.java Wed Sep 10 15:27:24 2014
@@ -29,6 +29,7 @@ import javax.annotation.concurrent.Immut
  * Affiliation: CHIP-NLP
  * Date: 11/18/13
  */
+// TODO No longer used - remove
 @Immutable
 final public class SpannedRareWordTerm {
 
@@ -37,19 +38,17 @@ final public class SpannedRareWordTerm {
    final private int _hashCode;
 
    /**
-    *
     * @param rareWordTerm contains a term from a {@link org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary}
-    * @param startOffset the start index of the term
-    * @param endOffset the end index of the term
+    * @param startOffset  the start index of the term
+    * @param endOffset    the end index of the term
     */
    public SpannedRareWordTerm( final RareWordTerm rareWordTerm, final int startOffset, final int endOffset ) {
       this( rareWordTerm, new DefaultTextSpan( startOffset, endOffset ) );
    }
 
    /**
-    *
     * @param rareWordTerm contains a term from a {@link org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary}
-    * @param spanKey the span of the term
+    * @param spanKey      the span of the term
     */
    public SpannedRareWordTerm( final RareWordTerm rareWordTerm, final TextSpan spanKey ) {
       _rareWordTerm = rareWordTerm;
@@ -65,7 +64,6 @@ final public class SpannedRareWordTerm {
    }
 
    /**
-    *
     * @return the term that was discovered in this span
     */
    public RareWordTerm getRareWordTerm() {
@@ -80,7 +78,7 @@ final public class SpannedRareWordTerm {
    @Override
    public boolean equals( final Object value ) {
       if ( value instanceof SpannedRareWordTerm ) {
-         final SpannedRareWordTerm other = (SpannedRareWordTerm) value;
+         final SpannedRareWordTerm other = (SpannedRareWordTerm)value;
          return _textSpan.equals( other._textSpan ) && _rareWordTerm.equals( other.getRareWordTerm() );
       }
       return false;

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/DefaultTextSpan.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/DefaultTextSpan.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/DefaultTextSpan.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/DefaultTextSpan.java Wed Sep 10 15:27:24 2014
@@ -28,14 +28,16 @@ import javax.annotation.concurrent.Immut
  */
 @Immutable
 final public class DefaultTextSpan implements TextSpan {
+
    final private int _start;
    final private int _end;
    final private int _hashCode;
 
    /**
     * Given span indices should be ordered start < end, but it is not an absolute requirement.
+    *
     * @param start start index of a span, be it of a string or other
-    * @param end end index of a span,  be it of a  string or other
+    * @param end   end index of a span,  be it of a  string or other
     */
    public DefaultTextSpan( final int start, final int end ) {
       _start = start;
@@ -61,6 +63,15 @@ final public class DefaultTextSpan imple
 
    /**
     * {@inheritDoc}
+    */
+   @Override
+   public int getLength() {
+      return _end - _start + 1;
+   }
+
+   /**
+    * {@inheritDoc}
+    *
     * @return a hashcode based upon the start and end indices of this span key
     */
    @Override
@@ -70,17 +81,19 @@ final public class DefaultTextSpan imple
 
    /**
     * {@inheritDoc}
+    *
     * @return true iff the start keys are equal and the end keys are equal
     */
    @Override
    public boolean equals( final Object object ) {
       return object instanceof DefaultTextSpan
-            && _start == ((DefaultTextSpan)object)._start
-            && _end == ((DefaultTextSpan)object)._end;
+             && _start == ((DefaultTextSpan)object)._start
+             && _end == ((DefaultTextSpan)object)._end;
    }
 
    /**
     * {@inheritDoc}
+    *
     * @return "TextSpan for span [start index] to [end index]"
     */
    @Override

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/MultiTextSpan.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/MultiTextSpan.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/MultiTextSpan.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/MultiTextSpan.java Wed Sep 10 15:27:24 2014
@@ -26,7 +26,7 @@ import java.util.Collection;
  * A useful key for hash collections based upon start and end indices and missing internal spans.
  * This is faster than using String as {@link String#hashCode()}
  * iterates over the internal character array of a new string (new(..), .substring(..), .lowercase(..), ...).
- *
+ * <p/>
  * There is a much better version of this in org.chboston.chip.nlp.annotation but this will do for now.
  */
 @Immutable
@@ -39,8 +39,9 @@ final public class MultiTextSpan impleme
 
    /**
     * Given span indices should be ordered start < end, but it is not an absolute requirement.
+    *
     * @param start start index of a span, be it of a string or other
-    * @param end end index of a span,  be it of a  string or other
+    * @param end   end index of a span,  be it of a  string or other
     */
    public MultiTextSpan( final int start, final int end, final Collection<TextSpan> missingSpans ) {
       _start = start;
@@ -65,6 +66,20 @@ final public class MultiTextSpan impleme
       return _end;
    }
 
+   /**
+    * return the length of the full span minus the lengths of the missing spans
+    * {@inheritDoc}
+    */
+   @Override
+   public int getLength() {
+      int length = _end - _start + 1;
+      for  ( TextSpan missingSpan : _missingSpans ) {
+         length -= missingSpan.getLength();
+      }
+      return length;
+   }
+
+
 
    public Collection<TextSpan> getMissingSpans() {
       return _missingSpans;
@@ -72,6 +87,7 @@ final public class MultiTextSpan impleme
 
    /**
     * {@inheritDoc}
+    *
     * @return a hashcode based upon the start and end indices of this span key
     */
    @Override
@@ -81,18 +97,20 @@ final public class MultiTextSpan impleme
 
    /**
     * {@inheritDoc}
+    *
     * @return true iff the start keys are equal and the end keys are equal
     */
    @Override
    public boolean equals( final Object object ) {
       return object instanceof MultiTextSpan
-            && _start == ((MultiTextSpan)object)._start
-            && _end == ((MultiTextSpan)object)._end
-            && _missingSpans.equals( ((MultiTextSpan) object)._missingSpans );
+             && _start == ((MultiTextSpan)object)._start
+             && _end == ((MultiTextSpan)object)._end
+             && _missingSpans.equals( ((MultiTextSpan)object)._missingSpans );
    }
 
    /**
     * {@inheritDoc}
+    *
     * @return "Discontiguous TextSpan for span [start index] to [end index] but missing:\n[missing spans]"
     */
    @Override

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/TextSpan.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/TextSpan.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/TextSpan.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/textspan/TextSpan.java Wed Sep 10 15:27:24 2014
@@ -24,6 +24,7 @@ package org.apache.ctakes.dictionary.loo
  * Date: 1/29/14
  */
 public interface TextSpan {
+
    /**
     * @return the start index used for this text span
     */
@@ -33,4 +34,10 @@ public interface TextSpan {
     * @return the end index used for this text span
     */
    int getEnd();
+
+   /**
+    * @return the length of the text span in characters
+    */
+   int getLength();
+
 }

Added: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/CuiCodeUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/CuiCodeUtil.java?rev=1624032&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/CuiCodeUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/CuiCodeUtil.java Wed Sep 10 15:27:24 2014
@@ -0,0 +1,60 @@
+package org.apache.ctakes.dictionary.lookup2.util;
+
+import java.util.Collection;
+import java.util.HashSet;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 9/5/2014
+ */
+final public class CuiCodeUtil {
+
+   private CuiCodeUtil() {
+   }
+
+   static public String getAsCui( final Long code ) {
+      final StringBuilder sb = new StringBuilder( 8 );
+      sb.append( code );
+      return getAsCui( sb );
+   }
+
+   static public String getAsCui( final String code ) {
+      if ( code.length() == 8 && code.startsWith( "C" ) ) {
+         return code;
+      }
+      final StringBuilder sb = new StringBuilder( 8 );
+      sb.append( code.replace( "C", "" ) );
+      return getAsCui( sb );
+   }
+
+   static private String getAsCui( final StringBuilder sb ) {
+      while ( sb.length() < 7 ) {
+         sb.insert( 0, '0' );
+      }
+      sb.insert( 0, 'C' );
+      return sb.toString();
+   }
+
+
+   static public Long getCuiCode( final String cui ) {
+      final String cuiText = getAsCui( cui );
+      final String cuiNum = cuiText.substring( 1, cuiText.length() );
+      try {
+         return Long.parseLong( cuiNum );
+      } catch ( NumberFormatException nfE ) {
+         System.err.println( "Could not create Cui Code for " + cui );
+      }
+      return -1l;
+   }
+
+   static public Collection<Long> getCuiCodes( final Collection<String> cuis ) {
+      final Collection<Long> cuiCodes = new HashSet<>( cuis.size() );
+      for ( String cui : cuis ) {
+         cuiCodes.add( getCuiCode( cui ) );
+      }
+      return cuiCodes;
+   }
+
+
+}

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/DictionarySpec.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/DictionarySpec.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/DictionarySpec.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/DictionarySpec.java Wed Sep 10 15:27:24 2014
@@ -18,31 +18,140 @@
  */
 package org.apache.ctakes.dictionary.lookup2.util;
 
+import org.apache.ctakes.dictionary.lookup2.concept.Concept;
+import org.apache.ctakes.dictionary.lookup2.concept.ConceptFactory;
 import org.apache.ctakes.dictionary.lookup2.consumer.TermConsumer;
 import org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary;
+import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
 
 import javax.annotation.concurrent.Immutable;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
 
 /**
  * Simple Container class that holds a {@link org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary}
- * collection and a {@link org.apache.ctakes.dictionary.lookup2.consumer.TermConsumer}
+ * collection, a a {@link org.apache.ctakes.dictionary.lookup2.concept.ConceptFactory}
+ * and a {@link org.apache.ctakes.dictionary.lookup2.consumer.TermConsumer}
  */
 @Immutable
 final public class DictionarySpec {
-   final private Collection<RareWordDictionary> _dictionaries;
+
+   static private final RareWordDictionary EMPTY_DICTIONARY = new RareWordDictionary() {
+      public String getName() {
+         return "Empty Dictionary";
+      }
+
+      public Collection<RareWordTerm> getRareWordHits( final FastLookupToken fastLookupToken ) {
+         return Collections.emptySet();
+      }
+
+      public Collection<RareWordTerm> getRareWordHits( final String rareWordText ) {
+         return Collections.emptySet();
+      }
+   };
+
+   static private final ConceptFactory EMPTY_CONCEPT_FACTORY = new ConceptFactory() {
+      public String getName() {
+         return "Empty Concept Factory";
+      }
+
+      public Concept createConcept( final Long cuiCode ) {
+         return new Concept( CuiCodeUtil.getAsCui( cuiCode ) );
+      }
+
+      public Map<Long, Concept> createConcepts( final Collection<Long> cuiCodes ) {
+         return Collections.emptyMap();
+      }
+   };
+
+   final Collection<String> _pairNames;
+   final Map<String, String> _pairDictionaryNames;
+   final Map<String, String> _pairConceptFactoryNames;
+   final Map<String, RareWordDictionary> _dictionaries;
+   final Map<String, ConceptFactory> _conceptFactories;
+
    final private TermConsumer _termConsumer;
-   public DictionarySpec( final Collection<RareWordDictionary> dictionaries,
+
+   /**
+    * @param termConsumer the consumer to add terms to the Cas
+    */
+   public DictionarySpec( final Map<String, String> pairDictionaryNames,
+                          final Map<String, String> pairConceptFactoryNames,
+                          final Map<String, RareWordDictionary> dictionaries,
+                          final Map<String, ConceptFactory> conceptFactories,
                           final TermConsumer termConsumer ) {
+      _pairNames = new HashSet<>( pairDictionaryNames.keySet() );
+      _pairNames.addAll( pairConceptFactoryNames.keySet() );
+      // TODO check for completion of pairings
+      _pairDictionaryNames = pairDictionaryNames;
+      _pairConceptFactoryNames = pairConceptFactoryNames;
       _dictionaries = dictionaries;
+      _conceptFactories = conceptFactories;
       _termConsumer = termConsumer;
    }
 
+   public Collection<String> getPairNames() {
+      return _pairNames;
+   }
+
    /**
-    * @return all dictionaries to use for term lookup
+    * @return the dictionary for the given pair name
     */
+   public RareWordDictionary getDictionary( final String pairName ) {
+      final String dictionaryName = _pairDictionaryNames.get( pairName );
+      if ( dictionaryName != null ) {
+         final RareWordDictionary dictionary = _dictionaries.get( dictionaryName );
+         if ( dictionary != null ) {
+            return dictionary;
+         }
+      }
+      // TODO log
+      return EMPTY_DICTIONARY;
+   }
+
+   /**
+    * @return the concept factory for concept creation
+    */
+   public ConceptFactory getConceptFactory( final String pairName ) {
+      final String conceptFactoryName = _pairConceptFactoryNames.get( pairName );
+      if ( conceptFactoryName != null ) {
+         final ConceptFactory conceptFactory = _conceptFactories.get( conceptFactoryName );
+         if ( conceptFactory != null ) {
+            return conceptFactory;
+         }
+      }
+      // TODO log
+      return EMPTY_CONCEPT_FACTORY;
+   }
+
+   public Collection<RareWordDictionary> getPairedDictionaries( final String conceptFactoryName ) {
+      final Collection<RareWordDictionary> dictionaries = new HashSet<>();
+      for ( Map.Entry<String, String> pairConceptFactoryName : _pairConceptFactoryNames.entrySet() ) {
+         if ( pairConceptFactoryName.getValue().equals( conceptFactoryName ) ) {
+            dictionaries.add( getDictionary( pairConceptFactoryName.getKey() ) );
+         }
+      }
+      return dictionaries;
+   }
+
+   public Collection<ConceptFactory> getPairedConceptFactories( final String dictionaryName ) {
+      final Collection<ConceptFactory> conceptFactories = new HashSet<>();
+      for ( Map.Entry<String, String> pairDictionaryName : _pairDictionaryNames.entrySet() ) {
+         if ( pairDictionaryName.getValue().equals( dictionaryName ) ) {
+            conceptFactories.add( getConceptFactory( pairDictionaryName.getKey() ) );
+         }
+      }
+      return conceptFactories;
+   }
+
    public Collection<RareWordDictionary> getDictionaries() {
-      return _dictionaries;
+      return new HashSet<>( _dictionaries.values() );
+   }
+
+   public Collection<ConceptFactory> getConceptFactories() {
+      return new HashSet<>( _conceptFactories.values() );
    }
 
    /**
@@ -51,4 +160,5 @@ final public class DictionarySpec {
    public TermConsumer getConsumer() {
       return _termConsumer;
    }
+
 }

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/FastLookupToken.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/FastLookupToken.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/FastLookupToken.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/FastLookupToken.java Wed Sep 10 15:27:24 2014
@@ -74,6 +74,13 @@ final public class FastLookupToken {
    }
 
    /**
+    * @return the length of the text span in characters
+    */
+   public int getLength() {
+      return _textSpan.getLength();
+   }
+
+   /**
     * @return the actual text in the document for the lookup token, in lowercase
     */
    public String getText() {
@@ -89,12 +96,13 @@ final public class FastLookupToken {
 
    /**
     * Two lookup tokens are equal iff the spans are equal.
+    *
     * @param value -
     * @return true if {@code value} is a {@code FastLookupToken} and has a span equal to this token's span
     */
    public boolean equals( final Object value ) {
       return value != null && value instanceof FastLookupToken
-            && _textSpan.equals( ((FastLookupToken)value).getTextSpan() );
+             && _textSpan.equals( ((FastLookupToken)value).getTextSpan() );
    }
 
    /**

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/LookupUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/LookupUtil.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/LookupUtil.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/LookupUtil.java Wed Sep 10 15:27:24 2014
@@ -28,31 +28,33 @@ import java.util.List;
  */
 final public class LookupUtil {
 
-   private LookupUtil() {}
+   private LookupUtil() {
+   }
 
 
    /**
     * Splits a string using a character.  Faster than String.split( regex )
+    *
     * @param line full text to split
-    * @param c character at which to split
+    * @param c    character at which to split
     * @return array of substrings or the original line if there are no characters c
     */
    static public String[] fastSplit( final String line, final char c ) {
       int nextSplit = line.indexOf( c );
       if ( nextSplit < 0 ) {
-         return new String[]{line};
+         return new String[]{ line };
       }
       final List<String> splits = new ArrayList<String>();
       int lastSplit = -1;
       while ( nextSplit > 0 ) {
-         splits.add( line.substring( lastSplit+1, nextSplit ) );
+         splits.add( line.substring( lastSplit + 1, nextSplit ) );
          lastSplit = nextSplit;
-         nextSplit = line.indexOf( c, lastSplit+1 );
+         nextSplit = line.indexOf( c, lastSplit + 1 );
       }
-      if ( lastSplit+1 < line.length() ) {
-         splits.add( line.substring( lastSplit+1 ) );
+      if ( lastSplit + 1 < line.length() ) {
+         splits.add( line.substring( lastSplit + 1 ) );
       }
-      return splits.toArray( new String[ splits.size() ] );
+      return splits.toArray( new String[splits.size()] );
    }
 
 }

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/SemanticUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/SemanticUtil.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/SemanticUtil.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/SemanticUtil.java Wed Sep 10 15:27:24 2014
@@ -27,14 +27,15 @@ import java.util.HashSet;
 /**
  * Utility class to aid in the handling of semantic groups, semantic types, and tuis.
  * Used most by the term consumers.
- *
+ * <p/>
  * Author: SPF
  * Affiliation: CHIP-NLP
  * Date: 2/25/14
  */
 final public class SemanticUtil {
 
-   private SemanticUtil() {}
+   private SemanticUtil() {
+   }
 
    // cTakes types
    static private final String[] DRUG = { "T109", "T110", "T114", "T115", "T116", "T118", "T119",
@@ -44,7 +45,7 @@ final public class SemanticUtil {
    static private final String[] FIND = { "T033", "T034", "T040", "T041", "T042", "T043", "T044", "T045", "T046",
                                           "T056", "T057", "T184" };
    static private final String[] PROC = { "T059", "T060", "T061" };
-   static private final String[] ANAT = { "T021","T022", "T023", "T024", "T025", "T026", "T029", "T030" };
+   static private final String[] ANAT = { "T021", "T022", "T023", "T024", "T025", "T026", "T029", "T030" };
 
    // non-cTakes types
    // cTakes ID 7.  What is Clinical Attribute?  Just the single [standard] type?
@@ -57,68 +58,91 @@ final public class SemanticUtil {
    //   static private final String[] PHEN = { "T034", "T038", "T068", "T069", "T067", "T070" };
 
 
-   static private final Collection<String> ANAT_TUIS = new HashSet<String>( Arrays.asList( ANAT ) );
-   static private final Collection<String> DISO_TUIS = new HashSet<String>( Arrays.asList( DISO ) );
-   static private final Collection<String> FIND_TUIS = new HashSet<String>( Arrays.asList( FIND ) );
-   static private final Collection<String> PROC_TUIS = new HashSet<String>( Arrays.asList( PROC ) );
-   static private final Collection<String> DRUG_TUIS = new HashSet<String>( Arrays.asList( DRUG ) );
-
-
-   static public final String UNKNOWN_SEMANTIC_GROUP = "UNKNOWN_SEMANTIC_GROUP";
-   static public final String UNKNOWN_SEMANTIC_ZERO = "0";
-
+   static private final Collection<String> ANAT_TUIS = new HashSet<>( Arrays.asList( ANAT ) );
+   static private final Collection<String> DISO_TUIS = new HashSet<>( Arrays.asList( DISO ) );
+   static private final Collection<String> FIND_TUIS = new HashSet<>( Arrays.asList( FIND ) );
+   static private final Collection<String> PROC_TUIS = new HashSet<>( Arrays.asList( PROC ) );
+   static private final Collection<String> DRUG_TUIS = new HashSet<>( Arrays.asList( DRUG ) );
+
+
+//   static public final String UNKNOWN_SEMANTIC_GROUP = "UNKNOWN_SEMANTIC_GROUP";
+//   static public final String UNKNOWN_SEMANTIC_ZERO = "0";
+
+
+//   /**
+//    * cTakes IdentifiedAnnotation only accepts an integer as a typeId, which historically map to cTakes semantic groups
+//    *
+//    * @param entityType the text name of the semantic group or type
+//    * @return the integer value of the entity type or {@code CONST.NE_TYPE_ID_UNKNOWN} if none or improperly formed
+//    */
+//   static public int getSemanticGroupId( final String entityType ) {
+//      if ( entityType == null || entityType.isEmpty() ) {
+//         return CONST.NE_TYPE_ID_UNKNOWN;
+//      }
+//      if ( entityType.equalsIgnoreCase( "DRUG" ) ) {
+//         return CONST.NE_TYPE_ID_DRUG;
+//      } else if ( entityType.equalsIgnoreCase( "DISO" ) ) {
+//         return CONST.NE_TYPE_ID_DISORDER;
+//      } else if ( entityType.equalsIgnoreCase( "FIND" ) ) {
+//         return CONST.NE_TYPE_ID_FINDING;
+//      } else if ( entityType.equalsIgnoreCase( "PROC" ) ) {
+//         return CONST.NE_TYPE_ID_PROCEDURE;
+//      } else if ( entityType.equalsIgnoreCase( "ANAT" ) ) {
+//         return CONST.NE_TYPE_ID_ANATOMICAL_SITE;
+//      }
+//      try {
+//         return Integer.parseInt( entityType );
+//      } catch ( NumberFormatException nfe ) {
+//         return CONST.NE_TYPE_ID_UNKNOWN;
+//      }
+//   }
+//
+//   /**
+//    * Sometimes a
+//    *
+//    * @param tuis a comma-delimited collection of tuis that apply to some annotation
+//    * @return all cTakes groups for the given tuis
+//    */
+//   static public Collection<Integer> getSemanticGroupIdFromTui( final String tuis ) {
+//      final Collection<Integer> typeIds = new HashSet<>( 1 );
+//      final String[] splits = LookupUtil.fastSplit( tuis, ',' );
+//      for ( String tui : splits ) {
+//         if ( ANAT_TUIS.contains( tui ) ) {
+//            typeIds.add( CONST.NE_TYPE_ID_ANATOMICAL_SITE );
+//         } else if ( DISO_TUIS.contains( tui ) ) {
+//            typeIds.add( CONST.NE_TYPE_ID_DISORDER );
+//         } else if ( FIND_TUIS.contains( tui ) ) {
+//            typeIds.add( CONST.NE_TYPE_ID_FINDING );
+//         } else if ( PROC_TUIS.contains( tui ) ) {
+//            typeIds.add( CONST.NE_TYPE_ID_PROCEDURE );
+//         } else if ( DRUG_TUIS.contains( tui ) ) {
+//            typeIds.add( CONST.NE_TYPE_ID_DRUG );
+//         } else {
+//            typeIds.add( CONST.NE_TYPE_ID_UNKNOWN );
+//         }
+//      }
+//      return typeIds;
+//   }
 
    /**
-    * cTakes IdentifiedAnnotation only accepts an integer as a typeId, which historically map to cTakes semantic groups
-    * @param entityType the text name of the semantic group or type
-    * @return the integer value of the entity type or {@code CONST.NE_TYPE_ID_UNKNOWN} if none or improperly formed
+    * Sometimes a
+    *
+    * @param tui a comma-delimited collection of tuis that apply to some annotation
+    * @return the cTakes group for the given tui
     */
-   static public int getSemanticGroupId( final String entityType ) {
-      if ( entityType == null || entityType.isEmpty() ) {
-         return CONST.NE_TYPE_ID_UNKNOWN;
-      }
-      if ( entityType.equalsIgnoreCase( "DRUG" ) ) {
-         return CONST.NE_TYPE_ID_DRUG;
-      } else if ( entityType.equalsIgnoreCase( "DISO" ) ) {
+   static public Integer getTuiSemanticGroupId( final String tui ) {
+      if ( ANAT_TUIS.contains( tui ) ) {
+         return CONST.NE_TYPE_ID_ANATOMICAL_SITE;
+      } else if ( DISO_TUIS.contains( tui ) ) {
          return CONST.NE_TYPE_ID_DISORDER;
-      } else if ( entityType.equalsIgnoreCase( "FIND" ) ) {
+      } else if ( FIND_TUIS.contains( tui ) ) {
          return CONST.NE_TYPE_ID_FINDING;
-      } else if ( entityType.equalsIgnoreCase( "PROC" ) ) {
+      } else if ( PROC_TUIS.contains( tui ) ) {
          return CONST.NE_TYPE_ID_PROCEDURE;
-      } else if ( entityType.equalsIgnoreCase( "ANAT" ) ) {
-         return CONST.NE_TYPE_ID_ANATOMICAL_SITE;
-      }
-      try {
-         return Integer.parseInt( entityType );
-      } catch ( NumberFormatException nfe ) {
-         return CONST.NE_TYPE_ID_UNKNOWN;
-      }
-   }
-
-   /**
-    * Sometimes a
-    * @param tuis a comma-delimited collection of tuis that apply to some annotation
-    * @return all cTakes groups for the given tuis
-    */
-   static public Collection<Integer> getSemanticGroupIdFromTui( final String tuis ) {
-      final Collection<Integer> typeIds = new HashSet<Integer>( 1 );
-      final String[] splits = LookupUtil.fastSplit( tuis, ',' );
-      for ( String tui : splits ) {
-         if ( ANAT_TUIS.contains( tui ) ) {
-            typeIds.add( CONST.NE_TYPE_ID_ANATOMICAL_SITE );
-         } else if ( DISO_TUIS.contains( tui ) ) {
-            typeIds.add( CONST.NE_TYPE_ID_DISORDER );
-         } else if ( FIND_TUIS.contains( tui ) ) {
-            typeIds.add( CONST.NE_TYPE_ID_FINDING );
-         } else if ( PROC_TUIS.contains( tui ) ) {
-            typeIds.add( CONST.NE_TYPE_ID_PROCEDURE );
-         } else if ( DRUG_TUIS.contains( tui ) ) {
-            typeIds.add( CONST.NE_TYPE_ID_DRUG );
-         } else {
-            typeIds.add( CONST.NE_TYPE_ID_UNKNOWN );
-         }
+      } else if ( DRUG_TUIS.contains( tui ) ) {
+         return CONST.NE_TYPE_ID_DRUG;
       }
-      return typeIds;
+      return CONST.NE_TYPE_ID_UNKNOWN;
    }
 
 

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TokenMatchUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TokenMatchUtil.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TokenMatchUtil.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TokenMatchUtil.java Wed Sep 10 15:27:24 2014
@@ -24,33 +24,35 @@ import java.util.List;
 
 /**
  * Utility class with methods for matching tokens to valid terms
- *
+ * <p/>
  * Author: SPF
  * Affiliation: CHIP-NLP
  * Date: 2/25/14
  */
 final public class TokenMatchUtil {
 
-   private TokenMatchUtil() {}
+   private TokenMatchUtil() {
+   }
 
 
    /**
     * Hopefully the jit will inline this method
-    * @param rareWordHit rare word term to check for match
-    * @param allTokens all tokens in a window
+    *
+    * @param rareWordHit    rare word term to check for match
+    * @param allTokens      all tokens in a window
     * @param termStartIndex index of first token in allTokens to check
-    * @param termEndIndex index of last token in allTokens to check
+    * @param termEndIndex   index of last token in allTokens to check
     * @return true if the rare word term exists in allTokens within the given indices
     */
    public static boolean isTermMatch( final RareWordTerm rareWordHit, final List<FastLookupToken> allTokens,
                                       final int termStartIndex, final int termEndIndex ) {
       final char[] rareWordHitChars = rareWordHit.getText().toCharArray();
       int hitCharIndex = 0;
-      for ( int i=termStartIndex; i<termEndIndex+1; i++ ) {
+      for ( int i = termStartIndex; i < termEndIndex + 1; i++ ) {
          final char[] tokenChars = allTokens.get( i ).getText().toCharArray();
          if ( isTokenMatch( rareWordHitChars, hitCharIndex, tokenChars ) ) {
             // the normal token matched, move to the next token
-            hitCharIndex += tokenChars.length+1;
+            hitCharIndex += tokenChars.length + 1;
             continue;
          }
          if ( allTokens.get( i ).getVariant() == null ) {
@@ -60,7 +62,7 @@ final public class TokenMatchUtil {
          final char[] variantChars = allTokens.get( i ).getVariant().toCharArray();
          if ( isTokenMatch( rareWordHitChars, hitCharIndex, variantChars ) ) {
             // the variant matched, move to the next token
-            hitCharIndex += variantChars.length+1;
+            hitCharIndex += variantChars.length + 1;
             continue;
          }
          // the normal token didn't match and the variant didn't match
@@ -73,18 +75,19 @@ final public class TokenMatchUtil {
    /**
     * Check the rare word term to see if a given token is at a given index within that term
     * Hopefully the jit will inline this method
+    *
     * @param rareWordHitChars character array of all characters for the entire possible term (all words)
     * @param hitCharIndex     character index in rare word term to check for token
     * @param tokenChars       character array of the search token
-    * @return                 true if rareWordHitChars contains tokenChars at location hitCharIndex
+    * @return true if rareWordHitChars contains tokenChars at location hitCharIndex
     */
    static private boolean isTokenMatch( final char[] rareWordHitChars, final int hitCharIndex,
-                                          final char[] tokenChars ) {
+                                        final char[] tokenChars ) {
       if ( hitCharIndex + tokenChars.length > rareWordHitChars.length ) {
          return false;
       }
-      for ( int tokenCharIndex = 0; tokenCharIndex<tokenChars.length; tokenCharIndex++ ) {
-         if ( tokenChars[tokenCharIndex] != rareWordHitChars[hitCharIndex+tokenCharIndex] ) {
+      for ( int tokenCharIndex = 0; tokenCharIndex < tokenChars.length; tokenCharIndex++ ) {
+         if ( tokenChars[tokenCharIndex] != rareWordHitChars[hitCharIndex + tokenCharIndex] ) {
             return false;
          }
       }

Added: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java?rev=1624032&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java (added)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/TuiCodeUtil.java Wed Sep 10 15:27:24 2014
@@ -0,0 +1,68 @@
+package org.apache.ctakes.dictionary.lookup2.util;
+
+import java.util.Collection;
+import java.util.HashSet;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 9/5/2014
+ */
+final public class TuiCodeUtil {
+
+   private TuiCodeUtil() {
+   }
+
+   static public String getAsTui( final Integer code ) {
+      final StringBuilder sb = new StringBuilder( 4 );
+      sb.append( code );
+      return getAsTui( sb );
+   }
+
+   static public String getAsTui( final String code ) {
+      if ( code.length() == 4 && code.startsWith( "T" ) ) {
+         return code;
+      }
+      final StringBuilder sb = new StringBuilder( 4 );
+      sb.append( code.replace( "T", "" ) );
+      return getAsTui( sb );
+   }
+
+   static private String getAsTui( final StringBuilder sb ) {
+      while ( sb.length() < 3 ) {
+         sb.insert( 0, '0' );
+      }
+      sb.insert( 0, 'T' );
+      return sb.toString();
+   }
+
+
+   static public Collection<String> getIntAsTuis( final Collection<Integer> tuiCodes ) {
+      final Collection<String> tuis = new HashSet<>( tuiCodes.size() );
+      for ( Integer tuiCode : tuiCodes ) {
+         tuis.add( getAsTui( tuiCode ) );
+      }
+      return tuis;
+   }
+
+   static public Collection<String> getStringAsTuis( final Collection<String> tuiNums ) {
+      final Collection<String> tuis = new HashSet<>( tuiNums.size() );
+      for ( String tuiNum : tuiNums ) {
+         tuis.add( getAsTui( tuiNum ) );
+      }
+      return tuis;
+   }
+
+   static public Integer getTuiCode( final String tui ) {
+      final String tuiText = getAsTui( tui );
+      final String tuiNum = tuiText.substring( 1, tuiText.length() );
+      try {
+         return Integer.parseInt( tuiNum );
+      } catch ( NumberFormatException nfE ) {
+         System.err.println( "Could not create Tui Code for " + tui );
+      }
+      return -1;
+   }
+
+
+}

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/UmlsUserApprover.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/UmlsUserApprover.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/UmlsUserApprover.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/UmlsUserApprover.java Wed Sep 10 15:27:24 2014
@@ -18,25 +18,20 @@
  */
 package org.apache.ctakes.dictionary.lookup2.util;
 
+import org.apache.ctakes.utils.env.EnvironmentVariable;
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.resource.ResourceInitializationException;
 
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.io.UnsupportedEncodingException;
+import java.io.*;
 import java.net.URL;
 import java.net.URLConnection;
 import java.net.URLEncoder;
 
-import org.apache.ctakes.utils.env.EnvironmentVariable;
-
 
 /**
  * Used to validate UMLS license / user.
- *
+ * <p/>
  * TODO  Authentication before download would be nice, or perhaps an encrypted download
  * Author: SPF
  * Affiliation: CHIP-NLP
@@ -52,10 +47,12 @@ final public class UmlsUserApprover {
 
    static final private Logger LOGGER = Logger.getLogger( "UmlsUserApprover" );
 
-   private UmlsUserApprover() {}
+   private UmlsUserApprover() {
+   }
 
    /**
     * Silently validate the UMLS license / user
+    *
     * @param aContext contains information about the UMLS license / user
     * @throws ResourceInitializationException if the validation does not pass
     */
@@ -67,10 +64,10 @@ final public class UmlsUserApprover {
       LOGGER.info( "Using " + UMLSADDR_PARAM + ": " + umlsAddress + ": " + umlsUser );
       if ( !isValidUMLSUser( umlsAddress, umlsVendor, umlsUser, umlsPassword ) ) {
          LOGGER.error( "Error: Invalid UMLS License.  " +
-                        "A UMLS License is required to use the UMLS dictionary lookup. \n" +
-                        "Error: You may request one at: https://uts.nlm.nih.gov/license.html \n" +
-                        "Please verify your UMLS license settings in the " +
-                        "DictionaryLookupAnnotatorUMLS.xml configuration." );
+                       "A UMLS License is required to use the UMLS dictionary lookup. \n" +
+                       "Error: You may request one at: https://uts.nlm.nih.gov/license.html \n" +
+                       "Please verify your UMLS license settings in the " +
+                       "DictionaryLookupAnnotatorUMLS.xml configuration." );
          throw new ResourceInitializationException( new Exception( "Failed to initilize.  Invalid UMLS License" ) );
       }
    }

Added: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/ArrayListMap.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/ArrayListMap.java?rev=1624032&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/ArrayListMap.java (added)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/ArrayListMap.java Wed Sep 10 15:27:24 2014
@@ -0,0 +1,175 @@
+package org.apache.ctakes.dictionary.lookup2.util.collection;
+
+import java.util.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 7/23/14
+ */
+final public class ArrayListMap<K, V> extends HashMap<K, List<V>> implements CollectionMap<K, V> {
+
+   public ArrayListMap() {
+      super();
+   }
+
+   /**
+    * @param size initial size of the ArrayListMap
+    */
+   public ArrayListMap( final int size ) {
+      super( size );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Iterator<Map.Entry<K, Collection<V>>> iterator() {
+      final Iterator<Map.Entry<K, List<V>>> setIterator = entrySet().iterator();
+      return new Iterator<Map.Entry<K, Collection<V>>>() {
+         public boolean hasNext() {
+            return setIterator.hasNext();
+         }
+
+         public Map.Entry<K, Collection<V>> next() {
+            final Map.Entry<K, List<V>> next = setIterator.next();
+            return new Map.Entry<K, Collection<V>>() {
+               public K getKey() {
+                  return next.getKey();
+               }
+
+               public Collection<V> getValue() {
+                  return next.getValue();
+               }
+
+               public Collection<V> setValue( final Collection<V> value ) {
+                  return null;
+               }
+            };
+         }
+
+         public void remove() {
+         }
+      };
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<Collection<V>> getAllCollections() {
+      return new HashSet<Collection<V>>( values() );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<V> getCollection( final K key ) {
+      final List<V> list = get( key );
+      if ( list != null ) {
+         return list;
+      }
+      return Collections.emptyList();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<V> obtainCollection( final K key ) {
+      List<V> list = get( key );
+      if ( list == null ) {
+         list = new ArrayList<>();
+         put( key, list );
+      }
+      return list;
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsValue( final K key, final V value ) {
+      final Collection<V> values = get( key );
+      return values != null && values.contains( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean placeValue( final K key, final V value ) {
+      List<V> list = get( key );
+      if ( list == null ) {
+         list = new ArrayList<>();
+         put( key, list );
+      }
+      return list.add( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean placeMap( final Map<K, V> map ) {
+      boolean placedAny = false;
+      for ( Map.Entry<K, V> entry : map.entrySet() ) {
+         final boolean placed = placeValue( entry.getKey(), entry.getValue() );
+         placedAny = placedAny || placed;
+      }
+      return placedAny;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void removeValue( final K key, final V value ) {
+      final List<V> list = get( key );
+      if ( list == null ) {
+         return;
+      }
+      list.remove( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int addAllValues( final K key, final Collection<V> collection ) {
+      List<V> list = get( key );
+      if ( list == null ) {
+         list = new ArrayList<>();
+         put( key, list );
+      }
+      final int oldSize = list.size();
+      list.addAll( collection );
+      return list.size() - oldSize;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void clearCollection( final K key ) {
+      List<V> list = get( key );
+      if ( list != null ) {
+         list.clear();
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Map<K, Collection<V>> toSimpleMap() {
+      final Map<K, Collection<V>> simpleMap = new HashMap<>( size() );
+      for ( K key : keySet() ) {
+         simpleMap.put( key, obtainCollection( key ) );
+      }
+      return simpleMap;
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/CollectionMap.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/CollectionMap.java?rev=1624032&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/CollectionMap.java (added)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/CollectionMap.java Wed Sep 10 15:27:24 2014
@@ -0,0 +1,106 @@
+package org.apache.ctakes.dictionary.lookup2.util.collection;
+
+import java.util.Collection;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 6/24/14
+ */
+public interface CollectionMap<K, V> extends Iterable<Map.Entry<K, Collection<V>>> {
+
+
+   public Set<K> keySet();
+
+   /**
+    * @return all of the collections for all keys
+    */
+   public Collection<Collection<V>> getAllCollections();
+
+
+   /**
+    * check the collection map for a key
+    *
+    * @param key key for internal collection
+    * @return <tt>true</tt> if this CollectionMap contain the key
+    */
+   public boolean containsKey( K key );
+
+   /**
+    * gets a collection mapped with key.  If one does not exist then an empty collection is returned
+    *
+    * @param key key for internal collection
+    * @return collection mapped with key or an empty collection if there is none
+    */
+   public Collection<V> getCollection( K key );
+
+   /**
+    * obtains a collection mapped with key.  If one does not exist then one is added to this CollectionMap
+    *
+    * @param key key for internal collection
+    * @return (possibly new) collection mapped with key
+    */
+   public Collection<V> obtainCollection( K key );
+
+   /**
+    * check the collection map for a key and value combination
+    *
+    * @param key   key for internal collection
+    * @param value value to check in internal collection
+    * @return <tt>true</tt> if this CollectionMap contain the value for the given key
+    */
+   public boolean containsValue( K key, V value );
+
+   /**
+    * places value into a collection mapped with key
+    *
+    * @param key   key for internal collection
+    * @param value value to placeValue in internal collection
+    * @return <tt>true</tt> if this set did not already contain the value
+    */
+   public boolean placeValue( K key, V value );
+
+   /**
+    * places each value of a map into a collection mapped with the appropriate key
+    *
+    * @param map map to store
+    * @return <tt>true</tt> if this set did not already contain the value
+    */
+   public boolean placeMap( Map<K, V> map );
+
+   /**
+    * removes value from a collection mapped with key
+    *
+    * @param key   key for internal collection
+    * @param value value to remove from internal collection
+    */
+   public void removeValue( K key, V value );
+
+   /**
+    * adds everything from the given collection to the internal collection mapped with key
+    *
+    * @param key        key for internal collection
+    * @param collection collection of values to place in internal collection
+    * @return the number of new items added
+    */
+   public int addAllValues( K key, Collection<V> collection );
+
+   /**
+    * clearCollection the collection mapped with key
+    *
+    * @param key key for internal collection
+    */
+   public void clearCollection( K key );
+
+   /**
+    * Copy of this object as a simple (java.util.Collection) map of Collection
+    *
+    * @return map of java.util.Collection
+    */
+   public Map<K, Collection<V>> toSimpleMap();
+
+   public boolean isEmpty();
+
+}

Added: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/HashSetMap.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/HashSetMap.java?rev=1624032&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/HashSetMap.java (added)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/HashSetMap.java Wed Sep 10 15:27:24 2014
@@ -0,0 +1,177 @@
+package org.apache.ctakes.dictionary.lookup2.util.collection;
+
+import java.util.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 6/24/14
+ */
+final public class HashSetMap<K, V> extends HashMap<K, Set<V>> implements CollectionMap<K, V> {
+
+   public HashSetMap() {
+      super();
+   }
+
+   /**
+    * @param size initial size of the HashSetMap
+    */
+   public HashSetMap( final int size ) {
+      super( size );
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Iterator<Map.Entry<K, Collection<V>>> iterator() {
+      final Iterator<Map.Entry<K, Set<V>>> setIterator = entrySet().iterator();
+      return new Iterator<Map.Entry<K, Collection<V>>>() {
+         public boolean hasNext() {
+            return setIterator.hasNext();
+         }
+
+         public Map.Entry<K, Collection<V>> next() {
+            final Map.Entry<K, Set<V>> next = setIterator.next();
+            return new Map.Entry<K, Collection<V>>() {
+               public K getKey() {
+                  return next.getKey();
+               }
+
+               public Collection<V> getValue() {
+                  return next.getValue();
+               }
+
+               public Collection<V> setValue( final Collection<V> value ) {
+                  return null;
+               }
+            };
+         }
+
+         public void remove() {
+         }
+      };
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<Collection<V>> getAllCollections() {
+      return new HashSet<Collection<V>>( values() );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<V> getCollection( final K key ) {
+      final Set<V> set = get( key );
+      if ( set != null ) {
+         return set;
+      }
+      return Collections.emptySet();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<V> obtainCollection( final K key ) {
+      Set<V> set = get( key );
+      if ( set == null ) {
+         set = new HashSet<>();
+         put( key, set );
+      }
+      return set;
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsValue( final K key, final V value ) {
+      final Collection<V> values = get( key );
+      return values != null && values.contains( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean placeValue( final K key, final V value ) {
+      Set<V> set = get( key );
+      if ( set == null ) {
+         set = new HashSet<>();
+         put( key, set );
+      }
+      return set.add( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean placeMap( final Map<K, V> map ) {
+      boolean placedAny = false;
+      for ( Map.Entry<K, V> entry : map.entrySet() ) {
+         final boolean placed = placeValue( entry.getKey(), entry.getValue() );
+         placedAny = placedAny || placed;
+      }
+      return placedAny;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void removeValue( final K key, final V value ) {
+      final Set<V> set = get( key );
+      if ( set == null ) {
+         return;
+      }
+      set.remove( value );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int addAllValues( final K key, final Collection<V> collection ) {
+      Set<V> set = get( key );
+      if ( set == null ) {
+         set = new HashSet<>();
+         put( key, set );
+      }
+      final int oldSize = set.size();
+      set.addAll( collection );
+      return set.size() - oldSize;
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void clearCollection( final K key ) {
+      final Set<V> set = get( key );
+      if ( set != null ) {
+         set.clear();
+      }
+   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Map<K, Collection<V>> toSimpleMap() {
+      final Map<K, Collection<V>> simpleMap = new HashMap<>( size() );
+      for ( K key : keySet() ) {
+         simpleMap.put( key, obtainCollection( key ) );
+      }
+      return simpleMap;
+   }
+
+}

Added: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/ImmutableCollectionMap.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/ImmutableCollectionMap.java?rev=1624032&view=auto
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/ImmutableCollectionMap.java (added)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/util/collection/ImmutableCollectionMap.java Wed Sep 10 15:27:24 2014
@@ -0,0 +1,138 @@
+package org.apache.ctakes.dictionary.lookup2.util.collection;
+
+import javax.annotation.concurrent.Immutable;
+import java.util.*;
+
+/**
+ * Author: SPF
+ * Affiliation: CHIP-NLP
+ * Date: 9/5/2014
+ */
+@Immutable
+final public class ImmutableCollectionMap<K, V> implements CollectionMap<K, V> {
+
+   final private CollectionMap<K, V> _protectedMap;
+
+   public ImmutableCollectionMap( final CollectionMap<K, V> collectionMap ) {
+      _protectedMap = collectionMap;
+   }
+
+   public Iterator<Map.Entry<K, Collection<V>>> iterator() {
+      return _protectedMap.iterator();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Set<K> keySet() {
+      return Collections.unmodifiableSet( _protectedMap.keySet() );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<Collection<V>> getAllCollections() {
+      return Collections.unmodifiableCollection( _protectedMap.getAllCollections() );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsKey( final K key ) {
+      return _protectedMap.containsKey( key );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<V> getCollection( final K key ) {
+      return _protectedMap.getCollection( key );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean containsValue( final K key, final V value ) {
+      return _protectedMap.containsValue( key, value );
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @throws java.lang.UnsupportedOperationException
+    */
+   @Override
+   public boolean placeValue( final K key, final V value ) {
+      throw new UnsupportedOperationException();
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @throws java.lang.UnsupportedOperationException
+    */
+   @Override
+   public boolean placeMap( final Map<K, V> map ) {
+      throw new UnsupportedOperationException();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<V> obtainCollection( final K key ) {
+      return getCollection( key );
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @throws java.lang.UnsupportedOperationException
+    */
+   @Override
+   public void removeValue( final K key, final V value ) {
+      throw new UnsupportedOperationException();
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @throws java.lang.UnsupportedOperationException
+    */
+   @Override
+   public int addAllValues( final K key, final Collection<V> collection ) {
+      throw new UnsupportedOperationException();
+   }
+
+   /**
+    * {@inheritDoc}
+    *
+    * @throws java.lang.UnsupportedOperationException
+    */
+   @Override
+   public void clearCollection( final K key ) {
+      throw new UnsupportedOperationException();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Map<K, Collection<V>> toSimpleMap() {
+      return _protectedMap.toSimpleMap();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public boolean isEmpty() {
+      return _protectedMap.isEmpty();
+   }
+
+}



Mime
View raw message