ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1624032 [2/3] - in /ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2: ae/ concept/ consumer/ dictionary/ relation/ term/ textspan/ util/ util/collection/
Date Wed, 10 Sep 2014 15:27:25 GMT
Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/PrecisionTermConsumer.java Wed Sep 10 15:27:24 2014
@@ -18,25 +18,21 @@
  */
 package org.apache.ctakes.dictionary.lookup2.consumer;
 
+import org.apache.ctakes.dictionary.lookup2.concept.Concept;
 import org.apache.ctakes.dictionary.lookup2.textspan.MultiTextSpan;
 import org.apache.ctakes.dictionary.lookup2.textspan.TextSpan;
-import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
+import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
+import org.apache.ctakes.dictionary.lookup2.util.collection.HashSetMap;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
+import java.util.*;
 
 /**
  * Refine a collection of dictionary terms to only contain the most specific variations:
  * "colon cancer" instead of "cancer", performed by span inclusion / complete containment, not overlap
- *
+ * <p/>
  * Author: SPF
  * Affiliation: CHIP-NLP
  * Date: 1/9/14
@@ -50,41 +46,47 @@ final public class PrecisionTermConsumer
       _idHitConsumer = new DefaultTermConsumer( uimaContext, properties );
    }
 
+
    /**
     * Only uses the largest spans for the type
     * {@inheritDoc}
     */
    @Override
-   protected void consumeTypeIdHits( final JCas jcas, final String codingScheme, final int typeId,
-                                     final Map<TextSpan, Collection<RareWordTerm>> lookupHitMap )
+   protected void consumeTypeIdHits( final JCas jcas, final String codingScheme, final int cTakesSemantic,
+                                     final CollectionMap<TextSpan, Long> semanticTerms,
+                                     final CollectionMap<Long, Concept> conceptMap )
          throws AnalysisEngineProcessException {
-      final Map<TextSpan, Collection<RareWordTerm>> preciseHitMap = createPreciseHitMap( lookupHitMap );
-      _idHitConsumer.consumeTypeIdHits( jcas, codingScheme, typeId, preciseHitMap );
+      final CollectionMap<TextSpan, Long> preciseTerms = createPreciseTerms( semanticTerms );
+      _idHitConsumer.consumeTypeIdHits( jcas, codingScheme, cTakesSemantic, preciseTerms, conceptMap );
    }
 
+
    /**
     * Refine a collection of dictionary terms to only contain the most specific variations:
     * "colon cancer" instead of "cancer", performed by span inclusion /complete containment, not overlap
-    * @param lookupHitMap terms in the dictionary
+    *
+    * @param semanticTerms terms in the dictionary
     * @return terms with the longest spans
     */
-   static private Map<TextSpan, Collection<RareWordTerm>> createPreciseHitMap(
-         final Map<TextSpan, Collection<RareWordTerm>> lookupHitMap ) {
-      final Collection<TextSpan> discardSpans = new HashSet<TextSpan>();
-      final List<TextSpan> textSpans = new ArrayList<TextSpan>( lookupHitMap.keySet() );
+   static private CollectionMap<TextSpan, Long> createPreciseTerms(
+         final CollectionMap<TextSpan, Long> semanticTerms ) {
+      final Collection<TextSpan> discardSpans = new HashSet<>();
+      final List<TextSpan> textSpans = new ArrayList<>( semanticTerms.keySet() );
       final int count = textSpans.size();
-      for ( int i=0; i<count; i++ ) {
+      for ( int i = 0; i < count; i++ ) {
          final TextSpan spanKeyI = textSpans.get( i );
-         for ( int j=i+1; j<count; j++ ) {
+         for ( int j = i + 1; j < count; j++ ) {
             final TextSpan spanKeyJ = textSpans.get( j );
             if ( (spanKeyJ.getStart() <= spanKeyI.getStart() && spanKeyJ.getEnd() > spanKeyI.getEnd())
-                  || (spanKeyJ.getStart() < spanKeyI.getStart() && spanKeyJ.getEnd() >= spanKeyI.getEnd()) ) {
+                 || (spanKeyJ.getStart() < spanKeyI.getStart() && spanKeyJ.getEnd() >= spanKeyI.getEnd()) ) {
                // J contains I, discard less precise concepts for span I and move on to next span I
                if ( spanKeyJ instanceof MultiTextSpan ) {
                   boolean spanIok = false;
                   for ( TextSpan missingSpanKey : ((MultiTextSpan)spanKeyJ).getMissingSpans() ) {
-                     if ( (missingSpanKey.getStart() >= spanKeyI.getStart() && missingSpanKey.getStart() < spanKeyI.getEnd())
-                           || (missingSpanKey.getEnd() > spanKeyI.getStart() && missingSpanKey.getEnd() <= spanKeyI.getEnd()) ) {
+                     if ( (missingSpanKey.getStart() >= spanKeyI.getStart()
+                           && missingSpanKey.getStart() < spanKeyI.getEnd())
+                          || (missingSpanKey.getEnd() > spanKeyI.getStart()
+                              && missingSpanKey.getEnd() <= spanKeyI.getEnd()) ) {
                         // I overlaps a missing span, so it is actually ok
                         spanIok = true;
                         break;
@@ -99,14 +101,16 @@ final public class PrecisionTermConsumer
                   break;
                }
             }
-            if ( ( (spanKeyI.getStart() <= spanKeyJ.getStart() && spanKeyI.getEnd() > spanKeyJ.getEnd() )
-                  || (spanKeyI.getStart() < spanKeyJ.getStart() && spanKeyI.getEnd() >= spanKeyJ.getEnd()) ) ) {
+            if ( ((spanKeyI.getStart() <= spanKeyJ.getStart() && spanKeyI.getEnd() > spanKeyJ.getEnd())
+                  || (spanKeyI.getStart() < spanKeyJ.getStart() && spanKeyI.getEnd() >= spanKeyJ.getEnd())) ) {
                // I contains J, discard less precise concepts for span J and move on to next span J
                if ( spanKeyI instanceof MultiTextSpan ) {
                   boolean spanJok = false;
                   for ( TextSpan missingSpanKey : ((MultiTextSpan)spanKeyI).getMissingSpans() ) {
-                     if ( (missingSpanKey.getStart() >= spanKeyJ.getStart() && missingSpanKey.getStart() < spanKeyJ.getEnd())
-                           || (missingSpanKey.getEnd() > spanKeyJ.getStart() && missingSpanKey.getEnd() <= spanKeyJ.getEnd()) ) {
+                     if ( (missingSpanKey.getStart() >= spanKeyJ.getStart()
+                           && missingSpanKey.getStart() < spanKeyJ.getEnd())
+                          || (missingSpanKey.getEnd() > spanKeyJ.getStart()
+                              && missingSpanKey.getEnd() <= spanKeyJ.getEnd()) ) {
                         // J overlaps a missing span, so it is actually ok
                         spanJok = true;
                         break;
@@ -121,14 +125,14 @@ final public class PrecisionTermConsumer
             }
          }
       }
-      final Map<TextSpan, Collection<RareWordTerm>> preciseHitMap
-            = new HashMap<TextSpan, Collection<RareWordTerm>>( lookupHitMap.size() - discardSpans.size() );
-      for ( Map.Entry<TextSpan,Collection<RareWordTerm>> entry : lookupHitMap.entrySet() ) {
+      final CollectionMap<TextSpan, Long> preciseHitMap = new HashSetMap<>( textSpans.size() - discardSpans.size() );
+      for ( Map.Entry<TextSpan, Collection<Long>> entry : semanticTerms ) {
          if ( !discardSpans.contains( entry.getKey() ) ) {
-            preciseHitMap.put( entry.getKey(), entry.getValue() );
+            preciseHitMap.addAllValues( entry.getKey(), entry.getValue() );
          }
       }
       return preciseHitMap;
    }
 
+
 }

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/TermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/TermConsumer.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/TermConsumer.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/TermConsumer.java Wed Sep 10 15:27:24 2014
@@ -18,13 +18,13 @@
  */
 package org.apache.ctakes.dictionary.lookup2.consumer;
 
+import org.apache.ctakes.dictionary.lookup2.concept.Concept;
 import org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary;
-import org.apache.ctakes.dictionary.lookup2.term.SpannedRareWordTerm;
+import org.apache.ctakes.dictionary.lookup2.textspan.TextSpan;
+import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 
-import java.util.Collection;
-
 /**
  * Stores terms in the cas
  * Author: SPF
@@ -33,13 +33,19 @@ import java.util.Collection;
  */
 public interface TermConsumer {
 
+
    /**
-    *
-    * @param jcas -
-    * @param dictionary the dictionary: Anatomical Site, Disease/Disorder, Drug, combination, etc.
-    * @param dictionaryTerms collection of discovered terms
+    * @param jcas            -
+    * @param dictionary      the dictionary: Anatomical Site, Disease/Disorder, Drug, combination, etc.
+    * @param textSpanCuis collection of discovered terms
+    * @param cuiConcepts      map of cuis to concepts
     * @throws AnalysisEngineProcessException
     */
-   void consumeHits( JCas jcas, RareWordDictionary dictionary, Collection<SpannedRareWordTerm> dictionaryTerms )
+   void consumeHits( final JCas jcas,
+                     final RareWordDictionary dictionary,
+                     final CollectionMap<TextSpan, Long> textSpanCuis,
+                     final CollectionMap<Long, Concept> cuiConcepts )
          throws AnalysisEngineProcessException;
+
+
 }

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/WsdTermConsumer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/WsdTermConsumer.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/WsdTermConsumer.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/consumer/WsdTermConsumer.java Wed Sep 10 15:27:24 2014
@@ -1,283 +1,283 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.dictionary.lookup2.consumer;
-
-import org.apache.ctakes.dictionary.lookup2.textspan.TextSpan;
-import org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary;
-import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
-import org.apache.ctakes.dictionary.lookup2.term.SpannedRareWordTerm;
-import org.apache.ctakes.dictionary.lookup2.util.SemanticUtil;
-import org.apache.ctakes.typesystem.type.constants.CONST;
-import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.FSArray;
-
-import java.sql.Connection;
-import java.sql.Driver;
-import java.sql.DriverManager;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-
-/**
- * Author: SPF
- * Affiliation: CHIP-NLP
- * Date: 12/16/13
- */
-public class WsdTermConsumer extends AbstractTermConsumer {
-
-   static private final String JDBC_DRIVER = "org.hsqldb.jdbcDriver";
-   static private final String DB_URL = "jdbc:hsqldb:res:resources/org/apache/ctakes/dictionary/lookup/cuiRelations/cuiRelations";
-   static private final String DB_USER = "sa";
-   static private final String DB_PASS = "";
-   static private final String DB_TABLE = "cuiRelations";
-   final private Connection _connection;
-   private PreparedStatement _metadataStatement;
-
-   public WsdTermConsumer( final UimaContext uimaContext, final Properties properties ) {
-      super( uimaContext, properties );
-      _connection = createDatabaseConnection();
-   }
-
-   protected void consumeTypeIdHits( final JCas jcas, final String codingScheme, final int typeId,
-                                              final Map<TextSpan, Collection<RareWordTerm>> lookupHitMap )
-         throws AnalysisEngineProcessException {
-      // Do nothing
-   }
-
-   static private void registerDriver() {
-      try {
-         Driver driver = (Driver)Class.forName( JDBC_DRIVER ).newInstance();
-         DriverManager.registerDriver( driver );
-      } catch ( Exception e ) {
-         // TODO At least four different exceptions are thrown here, and should be caught and handled individually
-         System.err.println( "Could not register Driver " + JDBC_DRIVER );
-         System.err.println( e.getMessage() );
-         System.exit( 1 );
-      }
-   }
-
-   static public Connection createDatabaseConnection() {
-      registerDriver();
-      Connection connection = null;
-      try {
-         connection = DriverManager.getConnection( DB_URL, DB_USER, DB_PASS );
-      } catch ( SQLException sqlE ) {
-         // thrown by Connection.prepareStatement(..) and getTotalRowCount(..)
-         System.err.println( "Could not establish connection to " + DB_URL + " as " + DB_USER );
-         System.err.println( sqlE.getMessage() );
-         System.exit( 1 );
-      }
-      return connection;
-   }
-
-   /**
-    *
-    * @param cui text of the rare word to use for term lookup
-    * @return an sql call to use for term lookup
-    * @throws SQLException if the {@code PreparedStatement} could not be created or changed
-    */
-   private PreparedStatement initMetaDataStatement( final String cui ) throws SQLException {
-      if ( _metadataStatement == null ) {
-         final String lookupSql = "SELECT * FROM " + DB_TABLE + " WHERE CUI = ?";
-         _metadataStatement = _connection.prepareStatement( lookupSql );
-      }
-      _metadataStatement.clearParameters();
-      _metadataStatement.setString( 1, cui );
-      return _metadataStatement;
-   }
-
-   /**
-    * {@inheritDoc}
-    */
-   @Override
-   public void consumeHits( final JCas jcas, final RareWordDictionary dictionary,
-                            final Collection<SpannedRareWordTerm> dictionaryTerms )
-         throws AnalysisEngineProcessException {
-      final String codingScheme = getCodingScheme();
-      final String entityType = dictionary.getSemanticGroup();
-      // cTakes IdentifiedAnnotation only accepts an integer as a typeId.
-      final int typeId = SemanticUtil.getSemanticGroupId( entityType );
-      // iterate over the LookupHit objects
-      final Map<TextSpan, Collection<RareWordTerm>> lookupHitMap = createLookupHitMap( dictionaryTerms );
-      // Set of Cuis to avoid duplicates at this offset
-      final Set<String> cuiSet = new HashSet<String>();
-      // Collection of UmlsConcept objects
-      final Collection<UmlsConcept> conceptList = new ArrayList<UmlsConcept>();
-      try {
-         for ( Map.Entry<TextSpan, Collection<RareWordTerm>> entry : lookupHitMap.entrySet() ) {
-            cuiSet.clear();
-            conceptList.clear();
-            final Collection<RareWordTerm> bestTerms = getBestRareWordTerms( entry.getValue(), dictionaryTerms );
-            for ( RareWordTerm lookupHit : bestTerms ) {
-               final String cui = lookupHit.getCui() ;
-               //String text = lh.getDictMetaDataHit().getMetaFieldValue("text");
-               if ( cuiSet.add( cui ) ) {
-                  final UmlsConcept concept = new UmlsConcept( jcas );
-                  concept.setCodingScheme( codingScheme );
-                  concept.setCui( cui );
-                  concept.setTui( lookupHit.getTui() );
-                  conceptList.add( concept );
-               }
-            }
-            // Skip updating CAS if all Concepts for this type were filtered out for this span.
-            if ( conceptList.isEmpty() ) {
-               continue;
-            }
-            // code is only valid if the covered text is also present in the filter
-            final int neBegin = entry.getKey().getStart();
-            final int neEnd = entry.getKey().getEnd();
-            final FSArray conceptArr = new FSArray( jcas, conceptList.size() );
-            int arrIdx = 0;
-            for ( UmlsConcept umlsConcept : conceptList ) {
-               conceptArr.set( arrIdx, umlsConcept );
-               arrIdx++;
-            }
-            final IdentifiedAnnotation identifiedAnnotation = new EntityMention( jcas );
-            identifiedAnnotation.setTypeID( typeId );
-            identifiedAnnotation.setBegin( neBegin );
-            identifiedAnnotation.setEnd( neEnd );
-            identifiedAnnotation.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP );
-            identifiedAnnotation.setOntologyConceptArr( conceptArr );
-            //            identifiedAnnotation.setConfidence( 0.1 );
-            identifiedAnnotation.addToIndexes();
-         }
-      } catch ( Exception e ) {
-         // TODO Poor form - refactor
-         throw new AnalysisEngineProcessException( e );
-      }
-   }
-
-
-   private Collection<RareWordTerm> getBestRareWordTerms( final Collection<RareWordTerm> spanTerms,
-                                                            final Collection<SpannedRareWordTerm> dictionaryTerms ) {
-      if ( spanTerms.size() <= 1 ) {
-         return spanTerms;
-      }
-      final Map<RareWordTerm, Integer> termValidityMap = new HashMap<RareWordTerm, Integer>( spanTerms.size() );
-      int highestValidity = 0;
-      for ( RareWordTerm term : spanTerms ) {
-         final int validity = getValidityByRelation( term, dictionaryTerms );
-         highestValidity = Math.max( highestValidity, validity );
-         termValidityMap.put( term, validity );
-      }
-      // Anything that is a synonym or above should be valid, or highest validity
-      highestValidity = Math.min( highestValidity, RelationType.SY.__relationStrength );
-      final Collection<RareWordTerm> bestTerms = new ArrayList<RareWordTerm>();
-      for ( Map.Entry<RareWordTerm,Integer> entry : termValidityMap.entrySet() ) {
-         if ( entry.getValue() == highestValidity ) {
-            bestTerms.add( entry.getKey() );
-         }
-      }
-      return bestTerms;
-   }
-
-
-   private int getValidityByRelation( final RareWordTerm term,
-                                             final Collection<SpannedRareWordTerm> dictionaryTerms ) {
-      final Collection<RelatedCui> relatedCuis = getRelatedCuis( term.getCui() );
-      int validity = 0;
-      for ( RelatedCui relatedCui : relatedCuis ) {
-         if ( haveCui( relatedCui.__cui, dictionaryTerms ) ) {
-            validity += relatedCui.__relationType.__relationStrength;
-         }
-      }
-      return validity;
-   }
-
-   private Collection<RelatedCui> getRelatedCuis( final String cui ) {
-      final List<RelatedCui> relatedCuis = new ArrayList<RelatedCui>();
-      try {
-         initMetaDataStatement( cui );
-         final ResultSet resultSet = _metadataStatement.executeQuery();
-         while ( resultSet.next() ) {
-            final RelatedCui relatedCui = new RelatedCui( resultSet.getString( FIELD_INDEX.CUI.__index),
-                                                          resultSet.getString( FIELD_INDEX.RELATION_TYPE.__index ) );
-            relatedCuis.add( relatedCui );
-         }
-         // Though the ResultSet interface documentation states that there are automatic closures,
-         // it is up to the driver to implement this behavior ...  historically some drivers have not done so
-         resultSet.close();
-         return relatedCuis;
-      } catch ( SQLException e ) {
-//         throw new DictionaryException( e );
-      }
-      return relatedCuis;
-   }
-
-   static private boolean haveCui( final String cui, final Collection<SpannedRareWordTerm> dictionaryTerms ) {
-      for ( SpannedRareWordTerm term : dictionaryTerms ) {
-         if ( term.getRareWordTerm().getCui().equals( cui ) ) {
-            return true;
-         }
-      }
-      return false;
-   }
-
-   static public enum RelationType {
-      // RL/SY : Synonym; SIB : Sibling; PAR : Parent; CHD : Child; RN,RB,RO : Narrow, Broad, Other; XR : No Relation
-      RL(9), SY(9), SIB(7), PAR(7), CHD(7), RN(8), RB(8), RO(5), XR(-5), UNKNOWN(0);
-      private final int __relationStrength;
-      private RelationType( final int relationStrength ) {
-         __relationStrength = relationStrength;
-      }
-      static private RelationType getRelationType( final String relationName ) {
-         for ( RelationType type : RelationType.values() ) {
-            if ( relationName.equals( type.name() ) ) {
-               return type;
-            }
-         }
-         return UNKNOWN;
-      }
-   }
-
-   static public class RelatedCui {
-      final private String __cui;
-      final private RelationType __relationType;
-      public RelatedCui( final String cui, final String relationName ) {
-         __cui = cui;
-         __relationType = RelationType.getRelationType( relationName );
-      }
-   }
-
-   /**
-    * Column (field) indices in the database.  Notice that these are constant and not configurable.
-    * If a configurable implementation is desired then create an extension.
-    */
-   static private enum FIELD_INDEX {
-      CUI( 1 ), RELATION_TYPE( 2 );
-      final private int __index;
-      private FIELD_INDEX( final int index ) {
-         __index = index;
-      }
-   }
-
-}
+///**
+// * Licensed to the Apache Software Foundation (ASF) under one
+// * or more contributor license agreements.  See the NOTICE file
+// * distributed with this work for additional information
+// * regarding copyright ownership.  The ASF licenses this file
+// * to you under the Apache License, Version 2.0 (the
+// * "License"); you may not use this file except in compliance
+// * with the License.  You may obtain a copy of the License at
+// *
+// *   http://www.apache.org/licenses/LICENSE-2.0
+// *
+// * Unless required by applicable law or agreed to in writing,
+// * software distributed under the License is distributed on an
+// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// * KIND, either express or implied.  See the License for the
+// * specific language governing permissions and limitations
+// * under the License.
+// */
+//package org.apache.ctakes.dictionary.lookup2.consumer;
+//
+//import org.apache.ctakes.dictionary.lookup2.textspan.TextSpan;
+//import org.apache.ctakes.dictionary.lookup2.dictionary.RareWordDictionary;
+//import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
+//import org.apache.ctakes.dictionary.lookup2.term.SpannedRareWordTerm;
+//import org.apache.ctakes.dictionary.lookup2.util.SemanticUtil;
+//import org.apache.ctakes.typesystem.type.constants.CONST;
+//import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+//import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+//import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+//import org.apache.uima.UimaContext;
+//import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+//import org.apache.uima.jcas.JCas;
+//import org.apache.uima.jcas.cas.FSArray;
+//
+//import java.sql.Connection;
+//import java.sql.Driver;
+//import java.sql.DriverManager;
+//import java.sql.PreparedStatement;
+//import java.sql.ResultSet;
+//import java.sql.SQLException;
+//import java.util.ArrayList;
+//import java.util.Collection;
+//import java.util.HashMap;
+//import java.util.HashSet;
+//import java.util.List;
+//import java.util.Map;
+//import java.util.Properties;
+//import java.util.Set;
+//
+///**
+// * Author: SPF
+// * Affiliation: CHIP-NLP
+// * Date: 12/16/13
+// */
+//public class WsdTermConsumer extends AbstractTermConsumer {
+//
+//   static private final String JDBC_DRIVER = "org.hsqldb.jdbcDriver";
+//   static private final String DB_URL = "jdbc:hsqldb:res:resources/org/apache/ctakes/dictionary/lookup/cuiRelations/cuiRelations";
+//   static private final String DB_USER = "sa";
+//   static private final String DB_PASS = "";
+//   static private final String DB_TABLE = "cuiRelations";
+//   final private Connection _connection;
+//   private PreparedStatement _metadataStatement;
+//
+//   public WsdTermConsumer( final UimaContext uimaContext, final Properties properties ) {
+//      super( uimaContext, properties );
+//      _connection = createDatabaseConnection();
+//   }
+//
+//   protected void consumeTypeIdHits( final JCas jcas, final String codingScheme, final int typeId,
+//                                              final Map<TextSpan, Collection<RareWordTerm>> lookupHitMap )
+//         throws AnalysisEngineProcessException {
+//      // Do nothing
+//   }
+//
+//   static private void registerDriver() {
+//      try {
+//         Driver driver = (Driver)Class.forName( JDBC_DRIVER ).newInstance();
+//         DriverManager.registerDriver( driver );
+//      } catch ( Exception e ) {
+//         // TODO At least four different exceptions are thrown here, and should be caught and handled individually
+//         System.err.println( "Could not register Driver " + JDBC_DRIVER );
+//         System.err.println( e.getMessage() );
+//         System.exit( 1 );
+//      }
+//   }
+//
+//   static public Connection createDatabaseConnection() {
+//      registerDriver();
+//      Connection connection = null;
+//      try {
+//         connection = DriverManager.getConnection( DB_URL, DB_USER, DB_PASS );
+//      } catch ( SQLException sqlE ) {
+//         // thrown by Connection.prepareStatement(..) and getTotalRowCount(..)
+//         System.err.println( "Could not establish connection to " + DB_URL + " as " + DB_USER );
+//         System.err.println( sqlE.getMessage() );
+//         System.exit( 1 );
+//      }
+//      return connection;
+//   }
+//
+//   /**
+//    *
+//    * @param cui text of the rare word to use for term lookup
+//    * @return an sql call to use for term lookup
+//    * @throws SQLException if the {@code PreparedStatement} could not be created or changed
+//    */
+//   private PreparedStatement initMetaDataStatement( final String cui ) throws SQLException {
+//      if ( _metadataStatement == null ) {
+//         final String lookupSql = "SELECT * FROM " + DB_TABLE + " WHERE CUI = ?";
+//         _metadataStatement = _connection.prepareStatement( lookupSql );
+//      }
+//      _metadataStatement.clearParameters();
+//      _metadataStatement.setString( 1, cui );
+//      return _metadataStatement;
+//   }
+//
+//   /**
+//    * {@inheritDoc}
+//    */
+//   @Override
+//   public void consumeHits( final JCas jcas, final RareWordDictionary dictionary,
+//                            final Collection<SpannedRareWordTerm> dictionaryTerms )
+//         throws AnalysisEngineProcessException {
+//      final String codingScheme = getCodingScheme();
+//      final String entityType = dictionary.getSemanticGroup();
+//      // cTakes IdentifiedAnnotation only accepts an integer as a typeId.
+//      final int typeId = SemanticUtil.getSemanticGroupId( entityType );
+//      // iterate over the LookupHit objects
+//      final Map<TextSpan, Collection<RareWordTerm>> lookupHitMap = createLookupHitMap( dictionaryTerms );
+//      // Set of Cuis to avoid duplicates at this offset
+//      final Set<String> cuiSet = new HashSet<String>();
+//      // Collection of UmlsConcept objects
+//      final Collection<UmlsConcept> conceptList = new ArrayList<UmlsConcept>();
+//      try {
+//         for ( Map.Entry<TextSpan, Collection<RareWordTerm>> entry : lookupHitMap.entrySet() ) {
+//            cuiSet.clearCollection();
+//            conceptList.clearCollection();
+//            final Collection<RareWordTerm> bestTerms = getBestRareWordTerms( entry.getValue(), dictionaryTerms );
+//            for ( RareWordTerm lookupHit : bestTerms ) {
+//               final String cui = lookupHit.getCuiCode() ;
+//               //String text = lh.getDictMetaDataHit().getMetaFieldValue("text");
+//               if ( cuiSet.add( cui ) ) {
+//                  final UmlsConcept concept = new UmlsConcept( jcas );
+//                  concept.setCodingScheme( codingScheme );
+//                  concept.setCui( cui );
+//                  concept.setTui( lookupHit.getTui() );
+//                  conceptList.add( concept );
+//               }
+//            }
+//            // Skip updating CAS if all Concepts for this type were filtered out for this span.
+//            if ( conceptList.isEmpty() ) {
+//               continue;
+//            }
+//            // code is only valid if the covered text is also present in the filter
+//            final int neBegin = entry.getKey().getStart();
+//            final int neEnd = entry.getKey().getEnd();
+//            final FSArray conceptArr = new FSArray( jcas, conceptList.size() );
+//            int arrIdx = 0;
+//            for ( UmlsConcept umlsConcept : conceptList ) {
+//               conceptArr.set( arrIdx, umlsConcept );
+//               arrIdx++;
+//            }
+//            final IdentifiedAnnotation identifiedAnnotation = new EntityMention( jcas );
+//            identifiedAnnotation.setTypeID( typeId );
+//            identifiedAnnotation.setBegin( neBegin );
+//            identifiedAnnotation.setEnd( neEnd );
+//            identifiedAnnotation.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP );
+//            identifiedAnnotation.setOntologyConceptArr( conceptArr );
+//            //            identifiedAnnotation.setConfidence( 0.1 );
+//            identifiedAnnotation.addToIndexes();
+//         }
+//      } catch ( Exception e ) {
+//         // TODO Poor form - refactor
+//         throw new AnalysisEngineProcessException( e );
+//      }
+//   }
+//
+//
+//   private Collection<RareWordTerm> getBestRareWordTerms( final Collection<RareWordTerm> spanTerms,
+//                                                            final Collection<SpannedRareWordTerm> dictionaryTerms ) {
+//      if ( spanTerms.size() <= 1 ) {
+//         return spanTerms;
+//      }
+//      final Map<RareWordTerm, Integer> termValidityMap = new HashMap<RareWordTerm, Integer>( spanTerms.size() );
+//      int highestValidity = 0;
+//      for ( RareWordTerm term : spanTerms ) {
+//         final int validity = getValidityByRelation( term, dictionaryTerms );
+//         highestValidity = Math.max( highestValidity, validity );
+//         termValidityMap.put( term, validity );
+//      }
+//      // Anything that is a synonym or above should be valid, or highest validity
+//      highestValidity = Math.min( highestValidity, RelationType.SY.__relationStrength );
+//      final Collection<RareWordTerm> bestTerms = new ArrayList<RareWordTerm>();
+//      for ( Map.Entry<RareWordTerm,Integer> entry : termValidityMap.entrySet() ) {
+//         if ( entry.getValue() == highestValidity ) {
+//            bestTerms.add( entry.getKey() );
+//         }
+//      }
+//      return bestTerms;
+//   }
+//
+//
+//   private int getValidityByRelation( final RareWordTerm term,
+//                                             final Collection<SpannedRareWordTerm> dictionaryTerms ) {
+//      final Collection<RelatedCui> relatedCuis = getRelatedCuis( term.getCuiCode() );
+//      int validity = 0;
+//      for ( RelatedCui relatedCui : relatedCuis ) {
+//         if ( haveCui( relatedCui.__cui, dictionaryTerms ) ) {
+//            validity += relatedCui.__relationType.__relationStrength;
+//         }
+//      }
+//      return validity;
+//   }
+//
+//   private Collection<RelatedCui> getRelatedCuis( final String cui ) {
+//      final List<RelatedCui> relatedCuis = new ArrayList<RelatedCui>();
+//      try {
+//         initMetaDataStatement( cui );
+//         final ResultSet resultSet = _metadataStatement.executeQuery();
+//         while ( resultSet.next() ) {
+//            final RelatedCui relatedCui = new RelatedCui( resultSet.getString( FIELD_INDEX.CUI.__index),
+//                                                          resultSet.getString( FIELD_INDEX.RELATION_TYPE.__index ) );
+//            relatedCuis.add( relatedCui );
+//         }
+//         // Though the ResultSet interface documentation states that there are automatic closures,
+//         // it is up to the driver to implement this behavior ...  historically some drivers have not done so
+//         resultSet.close();
+//         return relatedCuis;
+//      } catch ( SQLException e ) {
+////         throw new DictionaryException( e );
+//      }
+//      return relatedCuis;
+//   }
+//
+//   static private boolean haveCui( final String cui, final Collection<SpannedRareWordTerm> dictionaryTerms ) {
+//      for ( SpannedRareWordTerm term : dictionaryTerms ) {
+//         if ( term.getRareWordTerm().getCuiCode().equals( cui ) ) {
+//            return true;
+//         }
+//      }
+//      return false;
+//   }
+//
+//   static public enum RelationType {
+//      // RL/SY : Synonym; SIB : Sibling; PAR : Parent; CHD : Child; RN,RB,RO : Narrow, Broad, Other; XR : No Relation
+//      RL(9), SY(9), SIB(7), PAR(7), CHD(7), RN(8), RB(8), RO(5), XR(-5), UNKNOWN(0);
+//      private final int __relationStrength;
+//      private RelationType( final int relationStrength ) {
+//         __relationStrength = relationStrength;
+//      }
+//      static private RelationType getRelationType( final String relationName ) {
+//         for ( RelationType type : RelationType.values() ) {
+//            if ( relationName.equals( type.name() ) ) {
+//               return type;
+//            }
+//         }
+//         return UNKNOWN;
+//      }
+//   }
+//
+//   static public class RelatedCui {
+//      final private String __cui;
+//      final private RelationType __relationType;
+//      public RelatedCui( final String cui, final String relationName ) {
+//         __cui = cui;
+//         __relationType = RelationType.getRelationType( relationName );
+//      }
+//   }
+//
+//   /**
+//    * Column (field) indices in the database.  Notice that these are constant and not configurable.
+//    * If a configurable implementation is desired then create an extension.
+//    */
+//   static private enum FIELD_INDEX {
+//      CUI( 1 ), RELATION_TYPE( 2 );
+//      final private int __index;
+//      private FIELD_INDEX( final int index ) {
+//         __index = index;
+//      }
+//   }
+//
+//}

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/AbstractRareWordDictionary.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/AbstractRareWordDictionary.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/AbstractRareWordDictionary.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/AbstractRareWordDictionary.java Wed Sep 10 15:27:24 2014
@@ -19,14 +19,13 @@
 package org.apache.ctakes.dictionary.lookup2.dictionary;
 
 
-import org.apache.ctakes.dictionary.lookup2.util.FastLookupToken;
 import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
+import org.apache.ctakes.dictionary.lookup2.util.FastLookupToken;
 
 import java.util.ArrayList;
 import java.util.Collection;
 
 /**
- *
  * Author: SPF
  * Affiliation: CHIP-NLP
  * Date: 11/25/13
@@ -34,16 +33,13 @@ import java.util.Collection;
 abstract public class AbstractRareWordDictionary implements RareWordDictionary {
 
    final private String _name;
-   final private String _semanticGroup;
+
 
    /**
-    *
     * @param name simple name for the dictionary
-    * @param semanticGroup the type of term that exists in the dictionary: Anatomical Site, Disease/Disorder, Drug, etc.
     */
-   public AbstractRareWordDictionary( final String name, final String semanticGroup ) {
+   public AbstractRareWordDictionary( final String name ) {
       _name = name;
-      _semanticGroup = semanticGroup;
    }
 
    /**
@@ -58,14 +54,6 @@ abstract public class AbstractRareWordDi
     * {@inheritDoc}
     */
    @Override
-   public String getSemanticGroup() {
-      return _semanticGroup;
-   }
-
-   /**
-    * {@inheritDoc}
-    */
-   @Override
    public Collection<RareWordTerm> getRareWordHits( final FastLookupToken fastLookupToken ) {
       if ( fastLookupToken.getVariant() == null ) {
          return getRareWordHits( fastLookupToken.getText() );

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/BsvRareWordDictionary.java Wed Sep 10 15:27:24 2014
@@ -18,21 +18,20 @@
  */
 package org.apache.ctakes.dictionary.lookup2.dictionary;
 
-import org.apache.ctakes.dictionary.lookup2.util.FastLookupToken;
 import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
+import org.apache.ctakes.dictionary.lookup2.util.FastLookupToken;
 import org.apache.ctakes.dictionary.lookup2.util.LookupUtil;
+import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
 import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
 
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Map;
-
-import static org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator.CuiTuiTerm;
+import java.util.Properties;
 
 /**
  * A RareWordDictionary created from a bar-separated value (BSV) file.  The file can have 2 or 3 columns,
@@ -47,33 +46,33 @@ final public class BsvRareWordDictionary
 
    static private final Logger LOGGER = Logger.getLogger( "BsvRareWordDictionary" );
 
+   static private final String BSV_FILE_PATH = "bsvPath";
+
    private RareWordDictionary _delegateDictionary;
 
-   public BsvRareWordDictionary( final String name, final String entityId, final String bsvFilePath ) {
-      this( name, entityId, new File( bsvFilePath ) );
+
+   public BsvRareWordDictionary( final String name, final UimaContext uimaContext, final Properties properties ) {
+      this( name, properties.getProperty( BSV_FILE_PATH ) );
    }
 
-   public BsvRareWordDictionary( final String name, final String entityId, final File bsvFile ) {
-      final Collection<CuiTuiTerm> cuiTuiTerms = parseBsvFile( bsvFile, entityId );
-      final Map<String,Collection<RareWordTerm>> rareWordTermMap
-            = RareWordTermMapCreator.createRareWordTermMap( cuiTuiTerms );
-      _delegateDictionary = new MemRareWordDictionary( name, entityId, rareWordTermMap );
+
+   public BsvRareWordDictionary( final String name, final String bsvFilePath ) {
+      this( name, new File( bsvFilePath ) );
    }
 
-   /**
-    * {@inheritDoc}
-    */
-   @Override
-   public String getName() {
-      return _delegateDictionary.getName();
+   public BsvRareWordDictionary( final String name, final File bsvFile ) {
+      final Collection<RareWordTermMapCreator.CuiTerm> cuiTerms = parseBsvFile( bsvFile );
+      final CollectionMap<String, RareWordTerm> rareWordTermMap
+            = RareWordTermMapCreator.createRareWordTermMap( cuiTerms );
+      _delegateDictionary = new MemRareWordDictionary( name, rareWordTermMap );
    }
 
    /**
     * {@inheritDoc}
     */
    @Override
-   public String getSemanticGroup() {
-      return _delegateDictionary.getSemanticGroup();
+   public String getName() {
+      return _delegateDictionary.getName();
    }
 
    /**
@@ -94,68 +93,70 @@ final public class BsvRareWordDictionary
 
 
    /**
-    * Create a collection of {@link RareWordTermMapCreator.CuiTuiTerm} Objects
+    * Create a collection of {@link org.apache.ctakes.dictionary.lookup2.dictionary.RareWordTermMapCreator.CuiTerm} Objects
     * by parsing a bsv file.  The file can be in one of two columnar formats:
     * <p>
-    *    CUI|Text
+    * CUI|Text
     * </p>
     * or
     * <p>
-    *    CUI|TUI|Text
+    * CUI|TUI|Text
+    * </p>
+    * or
+    * <p>
+    * CUI|TUI|Text|PreferredTerm
     * </p>
     * If the TUI column is omitted then the entityId for the dictionary is used as the TUI
+    *
     * @param bsvFile file containing term rows and bsv columns
-    * @param entityId the entity id for the dictionary
     * @return collection of all valid terms read from the bsv file
     */
-   static private Collection<CuiTuiTerm> parseBsvFile( final File bsvFile, final String entityId ) {
-      final Collection<CuiTuiTerm> cuiTuiTerms = new ArrayList<CuiTuiTerm>();
+   static private Collection<RareWordTermMapCreator.CuiTerm> parseBsvFile( final File bsvFile ) {
+      final Collection<RareWordTermMapCreator.CuiTerm> cuiTerms = new ArrayList<>();
       try {
          final BufferedReader reader = new BufferedReader( new FileReader( bsvFile ) );
          String line = reader.readLine();
          while ( line != null ) {
+            if ( line.startsWith( "//" ) || line.startsWith( "#" ) ) {
+               continue;
+            }
             final String[] columns = LookupUtil.fastSplit( line, '|' );
-            final CuiTuiTerm cuiTuiTerm = createCuiTuiTerm( columns, entityId );
-            if ( cuiTuiTerm != null ) {
+            final RareWordTermMapCreator.CuiTerm cuiTerm = createCuiTuiTerm( columns );
+            if ( cuiTerm != null ) {
                // Add to the dictionary
-               cuiTuiTerms.add( cuiTuiTerm );
+               cuiTerms.add( cuiTerm );
             } else {
                LOGGER.warn( "Bad BSV line " + line + " in " + bsvFile.getPath() );
             }
             line = reader.readLine();
          }
          reader.close();
-      } catch ( FileNotFoundException fnfE ) {
-         LOGGER.error( fnfE.getMessage() );
       } catch ( IOException ioE ) {
          LOGGER.error( ioE.getMessage() );
       }
-      return cuiTuiTerms;
+      return cuiTerms;
    }
 
    /**
     * @param columns two or three columns representing CUI,Text or CUI,TUI,Text respectively
-    * @param entityId the entity id for the dictionary, used as the Term TUI should one not be specified
     * @return a term created from the columns or null if the columns are malformed
     */
-   static private CuiTuiTerm createCuiTuiTerm( final String[] columns, final String entityId ) {
-      if ( columns.length != 2 && columns.length != 3 ) {
+   static private RareWordTermMapCreator.CuiTerm createCuiTuiTerm( final String[] columns ) {
+      if ( columns.length < 2 ) {
          return null;
       }
       final int cuiIndex = 0;
-      int tuiIndex = -1;
       int termIndex = 1;
-      if ( columns.length == 3 ) {
-         tuiIndex = 1;
+      if ( columns.length >= 3 ) {
+         // second column is a tui, so text is in the third column
          termIndex = 2;
       }
-      if ( columns[ cuiIndex ].trim().isEmpty() || columns[ termIndex ].trim().isEmpty() ) {
+      if ( columns[cuiIndex].trim().isEmpty() || columns[termIndex].trim().isEmpty() ) {
          return null;
       }
-      final String cui = columns[ cuiIndex ].trim();
-      final String tui = (tuiIndex < 0 || columns[tuiIndex].trim().isEmpty()) ? entityId : columns[ tuiIndex ].trim();
-      final String term = columns[ termIndex ].trim().toLowerCase();
-      return new CuiTuiTerm( cui, tui, term );
+      final String cui = columns[cuiIndex];
+      final String term = columns[termIndex].trim().toLowerCase();
+      return new RareWordTermMapCreator.CuiTerm( cui, term );
    }
 
 }

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/DictionaryDescriptorParser.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/DictionaryDescriptorParser.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/DictionaryDescriptorParser.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/DictionaryDescriptorParser.java Wed Sep 10 15:27:24 2014
@@ -18,14 +18,12 @@
  */
 package org.apache.ctakes.dictionary.lookup2.dictionary;
 
+import org.apache.ctakes.dictionary.lookup2.concept.ConceptFactory;
 import org.apache.ctakes.dictionary.lookup2.consumer.TermConsumer;
 import org.apache.ctakes.dictionary.lookup2.util.DictionarySpec;
-import org.apache.ctakes.dictionary.lookup2.util.UmlsUserApprover;
 import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
-import org.apache.uima.resource.ResourceAccessException;
-import org.apache.uima.resource.ResourceInitializationException;
 import org.jdom.Document;
 import org.jdom.Element;
 import org.jdom.JDOMException;
@@ -35,14 +33,10 @@ import java.io.File;
 import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Properties;
+import java.util.*;
 
 /**
- * Parses the XML descriptor indicated by the {@code externalResource} for {@code RareWordTermsDescriptorFile}
+ * Parses the XML descriptor indicated by the {@code externalResource} for {@code DictionaryDescriptorFile}
  * in the XML descriptor for the Rare Word Term Lookup Annotator
  * {@link org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator}
  * </p>
@@ -71,9 +65,13 @@ final public class DictionaryDescriptorP
    }
 
    /**
-    * XML key specifying the section that defines each {@link RareWordDictionary} that should be used for annotation
+    * XML keys specifying the main sections that define dictionaries, concept factories, and the pairing of the two
     */
-   static private final String DICTIONARIES_KEY = "rareWordDictionaries";
+   static private final String DICTIONARIES_KEY = "dictionaries";
+   static private final String CONCEPT_FACTORIES_KEY = "conceptFactories";
+   static private final String PAIRS_KEY = "dictionaryConceptPairs";
+
+
    /**
     * Each {@link RareWordDictionary} should have an id that specifies a unique name for that dictionary
     */
@@ -130,6 +128,14 @@ final public class DictionaryDescriptorP
     * </ul>
     */
    private static final String IMPLEMENTATION = "implementation";
+
+   /**
+    * XML key specifying the section that defines the single
+    * {@link org.apache.ctakes.dictionary.lookup2.concept.ConceptFactory} that should be used to create concepts for discovered terms.
+    */
+   static private final String CONCEPTS_KEY = "conceptFactory";
+
+
    /**
     * XML key specifying the section that defines the single {@link org.apache.ctakes.dictionary.lookup2.consumer.TermConsumer} that should be used to
     * consume discovered terms.
@@ -147,7 +153,7 @@ final public class DictionaryDescriptorP
     *                       for an example
     * @param uimaContext    -
     * @return {@link org.apache.ctakes.dictionary.lookup2.util.DictionarySpec} with specification of dictionaries and a consumer as read from the
-    *         {@code descriptorFile}
+    * {@code descriptorFile}
     * @throws AnnotatorContextException if the File could not be found/read or the xml could not be parsed
     */
    static public DictionarySpec parseDescriptor( final File descriptorFile, final UimaContext uimaContext )
@@ -157,16 +163,20 @@ final public class DictionaryDescriptorP
       Document doc;
       try {
          doc = saxBuilder.build( descriptorFile );
-      } catch ( JDOMException jdomE ) {
-         throw new AnnotatorContextException( "Could not parse " + descriptorFile.getPath(), new Object[0], jdomE );
-      } catch ( IOException ioE ) {
-         throw new AnnotatorContextException( "Could not parse " + descriptorFile.getPath(), new Object[0], ioE );
+      } catch ( JDOMException | IOException jdomioE ) {
+         throw new AnnotatorContextException( "Could not parse " + descriptorFile.getPath(), new Object[0], jdomioE );
       }
       final Map<String, RareWordDictionary> dictionaries
             = parseDictionaries( uimaContext, doc.getRootElement().getChild( DICTIONARIES_KEY ) );
-      final TermConsumer consumer = parseConsumerXml( uimaContext,
-                                                              doc.getRootElement().getChild( CONSUMER_KEY ) );
-      return new DictionarySpec( dictionaries.values(), consumer );
+      final Map<String, ConceptFactory> conceptFactories
+            = parseConceptFactories( uimaContext, doc.getRootElement().getChild( CONCEPT_FACTORIES_KEY ) );
+      final Map<String, String> pairDictionaryNames
+            = parsePairingNames( doc.getRootElement().getChild( PAIRS_KEY ), "dictionaryName" );
+      final Map<String, String> pairConceptFactoryNames
+            = parsePairingNames( doc.getRootElement().getChild( PAIRS_KEY ), "conceptFactoryName" );
+      final TermConsumer consumer = parseConsumerXml( uimaContext, doc.getRootElement().getChild( CONSUMER_KEY ) );
+      return new DictionarySpec( pairDictionaryNames, pairConceptFactoryNames, dictionaries, conceptFactories,
+            consumer );
    }
 
    /**
@@ -174,25 +184,27 @@ final public class DictionaryDescriptorP
     *
     * @param uimaContext         -
     * @param dictionariesElement contains definition of all dictionaries
-    * @return Mapping of dictionary names {@link this.NAME_ID} to new {@link RareWordDictionary} instances
+    * @return Mapping of dictionary names to new {@link RareWordDictionary} instances
     * @throws AnnotatorContextException if the resource specified by {@link this.EXTERNAL_RESOURCE} does not match
     *                                   the type specified by {@link this.IMPLEMENTATION} or for some reason could not be used
     */
    static private Map<String, RareWordDictionary> parseDictionaries( final UimaContext uimaContext,
-                                                                           final Element dictionariesElement )
+                                                                     final Element dictionariesElement )
          throws AnnotatorContextException {
-      final Map<String, RareWordDictionary> engines = new HashMap<String, RareWordDictionary>();
-      final Collection dictatteers = dictionariesElement.getChildren();
-      for ( Object dictatteer : dictatteers ) {
-         if ( dictatteer instanceof Element ) {
-            final String id = ((Element) dictatteer).getAttributeValue( NAME_ID );
-            final RareWordDictionary dictionary = parseDictionaryXml( uimaContext, (Element) dictatteer );
-            engines.put( id, dictionary );
+      final Map<String, RareWordDictionary> dictionaries = new HashMap<>();
+      final Collection dictionaryElements = dictionariesElement.getChildren();
+      for ( Object dictionaryElement : dictionaryElements ) {
+         if ( dictionaryElement instanceof Element ) {
+            final RareWordDictionary dictionary = parseDictionary( uimaContext, (Element)dictionaryElement );
+            if ( dictionary != null ) {
+               dictionaries.put( dictionary.getName(), dictionary );
+            }
          }
       }
-      return engines;
+      return dictionaries;
    }
 
+
    /**
     * Creates a dictionary by parsing each child element of {@link this.DICTIONARIES_KEY}
     *
@@ -201,77 +213,218 @@ final public class DictionaryDescriptorP
     * @return a dictionary or null if there is a problem
     * @throws AnnotatorContextException if any of a dozen things goes wrong
     */
-   private static RareWordDictionary parseDictionaryXml( final UimaContext uimaContext,
-                                                         final Element dictionaryElement )
+   private static RareWordDictionary parseDictionary( final UimaContext uimaContext, final Element dictionaryElement )
          throws AnnotatorContextException {
-      final String externalResourceKey = dictionaryElement.getAttributeValue( EXTERNAL_RESOURCE );
-      final Boolean keepCase = Boolean.valueOf( dictionaryElement.getAttributeValue( CASE_SENSITIVE ) );
-      final String entityTypeId = dictionaryElement.getAttributeValue( TYPE_ID );
-      Object externalResource;
+      final Class[] constructionArgs = { String.class, UimaContext.class, Properties.class };
+
+      final String name = getName( "Dictionary Name", dictionaryElement );
+      final String className = dictionaryElement.getChildText( "implementationName" );
+      final Element propertiesElement = dictionaryElement.getChild( "properties" );
+      final Properties properties = parsePropertiesXml( propertiesElement );
+      Class dictionaryClass;
       try {
-         externalResource = uimaContext.getResourceObject( externalResourceKey );
-      } catch ( ResourceAccessException raE ) {
-         throw new AnnotatorContextException( "Could not access external resource " + externalResourceKey,
-                                              new Object[0], raE );
-      }
-      if ( externalResource == null ) {
-         throw new AnnotatorContextException( "Could not find external resource " + externalResourceKey,
-                                              new Object[0] );
-      }
-      RareWordDictionary dictionary = null;
-      final Element implementationElement = (Element) dictionaryElement.getChild( IMPLEMENTATION ).getChildren().get( 0 );
-      final String implementationName = implementationElement.getName();
-      if ( implementationName.equals( "rareWordJdbc" ) ) {
-         dictionary = DictionaryFactory.createRareWordJdbc( implementationElement,
-                                                            externalResource,
-                                                            entityTypeId );
-      } else if ( implementationName.equals( "rareWordUmls" ) ) {
+         dictionaryClass = Class.forName( className );
+      } catch ( ClassNotFoundException cnfE ) {
+         throw new AnnotatorContextException( "Unknown class " + className, new Object[0], cnfE );
+      }
+      if ( !RareWordDictionary.class.isAssignableFrom( dictionaryClass ) ) {
+         throw new AnnotatorContextException( className + " is not a Rare Word Dictionary", new Object[0] );
+      }
+      final Constructor[] constructors = dictionaryClass.getConstructors();
+      for ( Constructor constructor : constructors ) {
          try {
-            UmlsUserApprover.validateUMLSUser( uimaContext );
-            dictionary = DictionaryFactory.createRareWordJdbc( implementationElement,
-                                                               externalResource,
-                                                               entityTypeId );
-         } catch ( ResourceInitializationException riE ) {
-            throw new AnnotatorContextException( riE );
+            if ( Arrays.equals( constructionArgs, constructor.getParameterTypes() ) ) {
+               final Object[] args = new Object[]{ name, uimaContext, properties };
+               return (RareWordDictionary)constructor.newInstance( args );
+            }
+         } catch ( InstantiationException | IllegalAccessException | InvocationTargetException iniaitE ) {
+            throw new AnnotatorContextException( "Could not construct " + className, new Object[0], iniaitE );
          }
-      } else if ( implementationName.equals( "rareWordBsv" ) ) {
-         dictionary = DictionaryFactory.createRareWordBsv( externalResourceKey, externalResource, entityTypeId );
-//      } else if ( implementationName.equals( "luceneImpl" ) ) {
-//         dictionary = DictionaryFactory.createWrappedLucene( dictionaryElement,
-//                                                                     externalResourceKey,
-//                                                                     externalResource,
-//                                                                     entityTypeId );
-//      } else if ( implementationName.equals( "jdbcImpl" ) ) {
-//         dictionary = DictionaryFactory.createWrappedJdbc( dictionaryElement,
-//                                                                   implementationElement,
-//                                                                   externalResourceKey,
-//                                                                   externalResource,
-//                                                                   entityTypeId );
-//      } else if ( implementationName.equals( "csvImp" ) ) {
-//         dictionary = DictionaryFactory.createWrappedCsv( dictionaryElement,
-//                                                                  implementationElement,
-//                                                                  externalResourceKey,
-//                                                                  externalResource,
-//                                                                  entityTypeId );
-      } else {
-         throw new AnnotatorContextException( "Unsupported dictionary implementation " + implementationName,
-                                              new Object[0] );
-      }
-      if ( dictionary == null ) {
-         throw new AnnotatorContextException( "No appropriate dictionary defined", new Object[0] );
-      }
-      // Deprecated -
-//      if ( dictionary instanceof Dictionary ) {
-//         final Collection metaFields = dictionaryElement.getChild( "metaFields" ).getChildren();
-//         for ( Object value : metaFields ) {
-//            String metaFieldName = ((Element) value).getAttributeValue( "fieldName" );
-//            ((Dictionary) dictionary).retainMetaData( metaFieldName );
-//         }
-//      }
-      return dictionary;
+      }
+      throw new AnnotatorContextException( "No Constructor for " + className, new Object[0] );
+   }
+
+
+   /**
+    * Creates concept factories by parsing the section defined by {@link this.CONCEPT_FACTORY_KEY
+    *
+    * @param uimaContext             -
+    * @param conceptFactoriesElement contains definition of all concept factories
+    * @return Mapping of concept factory names to new {@link ConceptFactory} instances
+    * @throws AnnotatorContextException if the resource specified by {@link this.EXTERNAL_RESOURCE} does not match
+    *                                   the type specified by {@link this.IMPLEMENTATION} or for some reason could not be used
+    */
+   static private Map<String, ConceptFactory> parseConceptFactories( final UimaContext uimaContext,
+                                                                     final Element conceptFactoriesElement )
+         throws AnnotatorContextException {
+      final Map<String, ConceptFactory> conceptFactories = new HashMap<>();
+      final Collection conceptFactoryElements = conceptFactoriesElement.getChildren();
+      for ( Object conceptFactoryElement : conceptFactoryElements ) {
+         if ( conceptFactoryElement instanceof Element ) {
+            final ConceptFactory conceptFactory = parseConceptFactory( uimaContext, (Element)conceptFactoryElement );
+            if ( conceptFactory != null ) {
+               conceptFactories.put( conceptFactory.getName(), conceptFactory );
+            }
+         }
+      }
+      return conceptFactories;
+   }
+
+   /**
+    * Creates a dictionary by parsing each child element of {@link this.DICTIONARIES_KEY}
+    *
+    * @param uimaContext           -
+    * @param conceptFactoryElement contains the definition of a single dictionary
+    * @return a dictionary or null if there is a problem
+    * @throws AnnotatorContextException if any of a dozen things goes wrong
+    */
+   private static ConceptFactory parseConceptFactory( final UimaContext uimaContext,
+                                                      final Element conceptFactoryElement )
+         throws AnnotatorContextException {
+      final Class[] constructionArgs = { String.class, UimaContext.class, Properties.class };
+      final String name = getName( "Concept Factory Name", conceptFactoryElement );
+      final String className = conceptFactoryElement.getChildText( "implementationName" );
+      final Element propertiesElement = conceptFactoryElement.getChild( "properties" );
+      final Properties properties = parsePropertiesXml( propertiesElement );
+      Class conceptFactoryClass;
+      try {
+         conceptFactoryClass = Class.forName( className );
+      } catch ( ClassNotFoundException cnfE ) {
+         throw new AnnotatorContextException( "Unknown class " + className, new Object[0], cnfE );
+      }
+      if ( !ConceptFactory.class.isAssignableFrom( conceptFactoryClass ) ) {
+         throw new AnnotatorContextException( className + " is not a Concept Factory", new Object[0] );
+      }
+      final Constructor[] constructors = conceptFactoryClass.getConstructors();
+      for ( Constructor constructor : constructors ) {
+         try {
+            if ( Arrays.equals( constructionArgs, constructor.getParameterTypes() ) ) {
+               final Object[] args = new Object[]{ name, uimaContext, properties };
+               return (ConceptFactory)constructor.newInstance( args );
+            }
+         } catch ( InstantiationException | IllegalAccessException | InvocationTargetException iniaitE ) {
+            throw new AnnotatorContextException( "Could not construct " + className, new Object[0], iniaitE );
+         }
+      }
+      throw new AnnotatorContextException( "No Constructor for " + className, new Object[0] );
    }
 
 
+   /**
+    * @param pairingsElement -
+    * @param pairingName     one of "dictionaryName" or "conceptFactoryName"
+    * @return -
+    * @throws AnnotatorContextException -
+    */
+   static private Map<String, String> parsePairingNames( final Element pairingsElement, final String pairingName )
+         throws AnnotatorContextException {
+      final Map<String, String> pairConceptFactoryNames = new HashMap<>();
+      final Collection pairingElements = pairingsElement.getChildren();
+      for ( Object pairingElement : pairingElements ) {
+         if ( pairingElement instanceof Element ) {
+            final String pairName = getName( "Dictionary - Concept Factory Pairing", (Element)pairingElement );
+            final String conceptFactorName = ((Element)pairingElement).getChildText( pairingName );
+            pairConceptFactoryNames.put( pairName, conceptFactorName );
+         }
+      }
+      return pairConceptFactoryNames;
+   }
+
+   static private String getName( final String elementName, final Element element ) throws AnnotatorContextException {
+      final String name = element.getChildText( "name" );
+      if ( name == null || name.isEmpty() ) {
+         throw new AnnotatorContextException( "Missing name for " + elementName, new Object[0] );
+      }
+      return name;
+   }
+
+
+//
+//
+//
+//
+//   /**
+//    * Creates a dictionary by parsing each child element of {@link this.DICTIONARIES_KEY}
+//    *
+//    * @param uimaContext       -
+//    * @param dictionaryElement contains the definition of a single dictionary
+//    * @return a dictionary or null if there is a problem
+//    * @throws AnnotatorContextException if any of a dozen things goes wrong
+//    */
+//   private static RareWordDictionary parseDictionaryXml( final UimaContext uimaContext,
+//                                                         final Element dictionaryElement )
+//         throws AnnotatorContextException {
+//      final String externalResourceKey = dictionaryElement.getAttributeValue( EXTERNAL_RESOURCE );
+//      final Boolean keepCase = Boolean.valueOf( dictionaryElement.getAttributeValue( CASE_SENSITIVE ) );
+//      final String entityTypeId = dictionaryElement.getAttributeValue( TYPE_ID );
+//      Object externalResource;
+//      try {
+//         externalResource = uimaContext.getResourceObject( externalResourceKey );
+//      } catch ( ResourceAccessException raE ) {
+//         throw new AnnotatorContextException( "Could not access external resource " + externalResourceKey,
+//                                              new Object[0], raE );
+//      }
+//      if ( externalResource == null ) {
+//         throw new AnnotatorContextException( "Could not find external resource " + externalResourceKey,
+//                                              new Object[0] );
+//      }
+//      RareWordDictionary dictionary = null;
+//      final Element implementationElement = (Element) dictionaryElement.getChild( IMPLEMENTATION ).getChildren().get( 0 );
+//      final String implementationName = implementationElement.getName();
+//      if ( implementationName.equals( "rareWordJdbc" ) ) {
+//         dictionary = DictionaryFactory.createRareWordJdbc( implementationElement,
+//                                                            externalResource,
+//                                                            entityTypeId );
+//      } else if ( implementationName.equals( "rareWordUmls" ) ) {
+//         // TODO move umls info to the dictionary descriptor and parse parameter values here
+//         // final String externalResourceKey = dictionaryElement.getAttributeValue( EXTERNAL_RESOURCE );
+//         // TODO eventually move the umls dictionary download to a secure server with password protection
+//         try {
+//            // TODO attempt user etc. fetch from uimaContext.  If empty, attempt fetch from dictionaryElement
+//            UmlsUserApprover.validateUMLSUser( uimaContext );
+//            dictionary = DictionaryFactory.createRareWordJdbc( implementationElement,
+//                                                               externalResource,
+//                                                               entityTypeId );
+//         } catch ( ResourceInitializationException riE ) {
+//            throw new AnnotatorContextException( riE );
+//         }
+//      } else if ( implementationName.equals( "rareWordBsv" ) ) {
+//         dictionary = DictionaryFactory.createRareWordBsv( externalResourceKey, externalResource, entityTypeId );
+////      } else if ( implementationName.equals( "luceneImpl" ) ) {
+////         dictionary = DictionaryFactory.createWrappedLucene( dictionaryElement,
+////                                                                     externalResourceKey,
+////                                                                     externalResource,
+////                                                                     entityTypeId );
+////      } else if ( implementationName.equals( "jdbcImpl" ) ) {
+////         dictionary = DictionaryFactory.createWrappedJdbc( dictionaryElement,
+////                                                                   implementationElement,
+////                                                                   externalResourceKey,
+////                                                                   externalResource,
+////                                                                   entityTypeId );
+////      } else if ( implementationName.equals( "csvImp" ) ) {
+////         dictionary = DictionaryFactory.createWrappedCsv( dictionaryElement,
+////                                                                  implementationElement,
+////                                                                  externalResourceKey,
+////                                                                  externalResource,
+////                                                                  entityTypeId );
+//      } else {
+//         throw new AnnotatorContextException( "Unsupported dictionary implementation " + implementationName,
+//                                              new Object[0] );
+//      }
+//      if ( dictionary == null ) {
+//         throw new AnnotatorContextException( "No appropriate dictionary defined", new Object[0] );
+//      }
+//      // Deprecated -
+////      if ( dictionary instanceof Dictionary ) {
+////         final Collection metaFields = dictionaryElement.getChild( "metaFields" ).getChildren();
+////         for ( Object value : metaFields ) {
+////            String metaFieldName = ((Element) value).getAttributeValue( "fieldName" );
+////            ((Dictionary) dictionary).retainMetaData( metaFieldName );
+////         }
+////      }
+//      return dictionary;
+//   }
+
 
    /**
     * Creates a term consumer by parsing section defined by {@link this.CONSUMER_KEY}
@@ -282,12 +435,12 @@ final public class DictionaryDescriptorP
     * @throws AnnotatorContextException if any of a dozen things goes wrong
     */
    private static TermConsumer parseConsumerXml( final UimaContext uimaContext,
-                                                         final Element lookupConsumerElement ) throws
-                                                                                               AnnotatorContextException {
-      Class[] constrArgsConsum = {UimaContext.class, Properties.class, int.class};//ohnlp-Bugs-3296301
-      Class[] constrArgsConsumB = {UimaContext.class, Properties.class};
+                                                 final Element lookupConsumerElement ) throws
+         AnnotatorContextException {
+      Class[] constrArgsConsum = { UimaContext.class, Properties.class, int.class };//ohnlp-Bugs-3296301
+      Class[] constrArgsConsumB = { UimaContext.class, Properties.class };
 
-      String consumerClassName = lookupConsumerElement.getAttributeValue( "className" );
+      String consumerClassName = lookupConsumerElement.getChildText( "implementationName" );
       Element consumerPropertiesElement = lookupConsumerElement.getChild( "properties" );
       Properties consumerProperties = parsePropertiesXml( consumerPropertiesElement );
       Class consumerClass;
@@ -298,24 +451,21 @@ final public class DictionaryDescriptorP
       }
       if ( !TermConsumer.class.isAssignableFrom( consumerClass ) ) {
          throw new AnnotatorContextException( consumerClassName + " is not a TermConsumer",
-                                              new Object[0] );
+               new Object[0] );
       }
       final Constructor[] constructors = consumerClass.getConstructors();
       for ( Constructor constructor : constructors ) {
          try {
             if ( Arrays.equals( constrArgsConsum, constructor.getParameterTypes() ) ) {
-               final Object[] args = new Object[]{uimaContext, consumerProperties, MAX_LIST_SIZE}; //ohnlp-Bugs-3296301
-               return (TermConsumer) constructor.newInstance( args );
+               final Object[] args = new Object[]{ uimaContext, consumerProperties,
+                                                   MAX_LIST_SIZE }; //ohnlp-Bugs-3296301
+               return (TermConsumer)constructor.newInstance( args );
             } else if ( Arrays.equals( constrArgsConsumB, constructor.getParameterTypes() ) ) {
-               final Object[] args = new Object[]{uimaContext, consumerProperties};
-               return (TermConsumer) constructor.newInstance( args );
+               final Object[] args = new Object[]{ uimaContext, consumerProperties };
+               return (TermConsumer)constructor.newInstance( args );
             }
-         } catch ( InstantiationException inE ) {
-            throw new AnnotatorContextException( "Could not construct " + consumerClassName, new Object[0], inE );
-         } catch ( IllegalAccessException iaE ) {
-            throw new AnnotatorContextException( "Could not construct " + consumerClassName, new Object[0], iaE );
-         } catch ( InvocationTargetException itE ) {
-            throw new AnnotatorContextException( "Could not construct " + consumerClassName, new Object[0], itE );
+         } catch ( InstantiationException | IllegalAccessException | InvocationTargetException multE ) {
+            throw new AnnotatorContextException( "Could not construct " + consumerClassName, new Object[0], multE );
          }
       }
       throw new AnnotatorContextException( "No Constructor for " + consumerClassName, new Object[0] );
@@ -331,7 +481,7 @@ final public class DictionaryDescriptorP
       final Properties properties = new Properties();
       final Collection propertyElements = propertiesElement.getChildren();
       for ( Object value : propertyElements ) {
-         final Element propertyElement = (Element) value;
+         final Element propertyElement = (Element)value;
          final String key = propertyElement.getAttributeValue( "key" );
          final String propertyValue = propertyElement.getAttributeValue( "value" );
          properties.put( key, propertyValue );

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/DictionaryFactory.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/DictionaryFactory.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/DictionaryFactory.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/DictionaryFactory.java Wed Sep 10 15:27:24 2014
@@ -33,14 +33,15 @@ import java.sql.Connection;
  * all methods have been commented out.  Uncommenting, linking, and rebuilding is possible if use of an older dictionary
  * resource is required.
  * TODO
- *
+ * <p/>
  * Author: SPF
  * Affiliation: CHIP-NLP
  * Date: 2/20/14
  */
 final public class DictionaryFactory {
 
-   private DictionaryFactory() {}
+   private DictionaryFactory() {
+   }
 
 
    /**
@@ -57,7 +58,7 @@ final public class DictionaryFactory {
          throws AnnotatorContextException {
       checkResourceType( JdbcConnectionResource.class, externalResource );
       final String tableName = implementationElement.getAttributeValue( "tableName" );
-      final Connection connection = ((JdbcConnectionResource) externalResource).getConnection();
+      final Connection connection = ((JdbcConnectionResource)externalResource).getConnection();
       return new JdbcRareWordDictionary( entityTypeId, connection, tableName );
    }
 
@@ -74,8 +75,8 @@ final public class DictionaryFactory {
                                                        final String entityTypeId )
          throws AnnotatorContextException {
       checkResourceType( FileResource.class, externalResource );
-      final File bsvFile = ((FileResource) externalResource).getFile();
-      return new BsvRareWordDictionary( externalResourceKey, entityTypeId, bsvFile );
+      final File bsvFile = ((FileResource)externalResource).getFile();
+      return new BsvRareWordDictionary( externalResourceKey, bsvFile );
    }
 
 //   /**
@@ -205,7 +206,7 @@ final public class DictionaryFactory {
          return;
       }
       throw new AnnotatorContextException( "Expected external resource to be " + expectedClassType.getName()
-                                                 + " not " + typeValue.getClass().getName(), new Object[0] );
+                                           + " not " + typeValue.getClass().getName(), new Object[0] );
    }
 
 }

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/JdbcRareWordDictionary.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/JdbcRareWordDictionary.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/JdbcRareWordDictionary.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/JdbcRareWordDictionary.java Wed Sep 10 15:27:24 2014
@@ -19,15 +19,15 @@
 package org.apache.ctakes.dictionary.lookup2.dictionary;
 
 import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
+import org.apache.ctakes.dictionary.lookup2.util.CuiCodeUtil;
 import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
 
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
+import java.sql.*;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.Properties;
 
 /**
  * Preferred dictionary to use for large collections of terms.
@@ -44,8 +44,9 @@ final public class JdbcRareWordDictionar
     * If a configurable implementation is desired then create an extension.
     */
    static private enum FIELD_INDEX {
-      CUI( 1 ), TUI( 2 ), RINDEX( 3 ), TCOUNT( 4 ), TEXT( 5 ), RWORD( 6 );
+      CUI( 1 ), RINDEX( 2 ), TCOUNT( 3 ), TEXT( 4 ), RWORD( 5 );
       final private int __index;
+
       private FIELD_INDEX( final int index ) {
          __index = index;
       }
@@ -54,22 +55,76 @@ final public class JdbcRareWordDictionar
    // LOG4J logger based on class name
    final private Logger _logger = Logger.getLogger( getClass().getName() );
 
+
+   // TODO move to Constants class
+   static private final String JDBC_DRIVER = "jdbcDriver";
+   static private final String JDBC_URL = "jdbcUrl";
+   static private final String JDBC_USER = "jdbcUser";
+   static private final String JDBC_PASS = "jdbcPass";
+   static private final String RARE_WORD_TABLE = "rareWordTable";
+
+
    final private Connection _connection;
-   final private String _tableName;
-   private PreparedStatement _metadataStatement;
+   private PreparedStatement _selectTermCall;
+
+
+   public JdbcRareWordDictionary( final String name, final UimaContext uimaContext, final Properties properties )
+         throws ClassNotFoundException, InstantiationException, IllegalAccessException {
+      this( name,
+            properties.getProperty( JDBC_DRIVER ), properties.getProperty( JDBC_URL ),
+            properties.getProperty( JDBC_USER ), properties.getProperty( JDBC_PASS ),
+            properties.getProperty( RARE_WORD_TABLE ) );
+   }
+
+
+   public JdbcRareWordDictionary( final String name,
+                                  final String jdbcDriver,
+                                  final String jdbcUrl,
+                                  final String jdbcUser,
+                                  final String jdbcPass,
+                                  final String tableName )
+         throws ClassNotFoundException, InstantiationException, IllegalAccessException {
+      super( name );
+      try {
+         final Driver driver = (Driver)Class.forName( jdbcDriver ).newInstance();
+         DriverManager.registerDriver( driver );
+      } catch ( SQLException sqlE ) {
+         _logger.error( "Could not register Driver " + jdbcDriver, sqlE );
+         throw new InstantiationException( "Could not register Driver " + jdbcDriver );
+      } catch ( ClassNotFoundException | InstantiationException | IllegalAccessException multE ) {
+         _logger.error( "Could not create Driver " + jdbcDriver, multE );
+         throw multE;
+      }
+      Connection connection = null;
+      try {
+         connection = DriverManager.getConnection( jdbcUrl, jdbcUser, jdbcPass );
+      } catch ( SQLException sqlE ) {
+         _logger.error( "Could not create Connection with " + jdbcUrl + " as " + jdbcUser, sqlE );
+         throw new InstantiationException( "Could not create Connection with " + jdbcUrl + " as " + jdbcUser );
+      }
+      _connection = connection;
+      try {
+         _selectTermCall = createSelectCall( tableName );
+      } catch ( SQLException sqlE ) {
+         _logger.error( "Could not create Term Data Selection Call", sqlE );
+      }
+   }
+
 
    /**
-    *
-    * @param semanticGroup the type of term that exists in the dictionary: Anatomical Site, Disease/Disorder, Drug, etc.
     * @param connection database connection
-    * @param tableName name of the database table to use for lookup.  Used as the simple name for the dictionary
+    * @param tableName  name of the database table to use for lookup.  Used as the simple name for the dictionary
     */
-   public JdbcRareWordDictionary( final String semanticGroup,
+   public JdbcRareWordDictionary( final String name,
                                   final Connection connection,
                                   final String tableName ) {
-      super( tableName, semanticGroup );
+      super( name );
       _connection = connection;
-      _tableName = tableName;
+      try {
+         _selectTermCall = createSelectCall( tableName );
+      } catch ( SQLException sqlE ) {
+         _logger.error( "Could not create Term Data Selection Call", sqlE );
+      }
    }
 
    /**
@@ -79,15 +134,14 @@ final public class JdbcRareWordDictionar
    public Collection<RareWordTerm> getRareWordHits( final String rareWordText ) {
       final List<RareWordTerm> rareWordTerms = new ArrayList<RareWordTerm>();
       try {
-         initMetaDataStatement( rareWordText );
-         final ResultSet resultSet = _metadataStatement.executeQuery();
+         fillSelectCall( rareWordText );
+         final ResultSet resultSet = _selectTermCall.executeQuery();
          while ( resultSet.next() ) {
-            final RareWordTerm rareWordTerm = new RareWordTerm( resultSet.getString( FIELD_INDEX.TEXT.__index),
-                                                                resultSet.getString( FIELD_INDEX.CUI.__index ),
-                                                                resultSet.getString( FIELD_INDEX.TUI.__index ),
-                                                                resultSet.getString( FIELD_INDEX.RWORD.__index ),
-                                                                resultSet.getInt( FIELD_INDEX.RINDEX.__index ),
-                                                                resultSet.getInt( FIELD_INDEX.TCOUNT.__index ) );
+            final RareWordTerm rareWordTerm = new RareWordTerm( resultSet.getString( FIELD_INDEX.TEXT.__index ),
+                  resultSet.getLong( FIELD_INDEX.CUI.__index ),
+                  resultSet.getString( FIELD_INDEX.RWORD.__index ),
+                  resultSet.getInt( FIELD_INDEX.RINDEX.__index ),
+                  resultSet.getInt( FIELD_INDEX.TCOUNT.__index ) );
             rareWordTerms.add( rareWordTerm );
          }
          // Though the ResultSet interface documentation states that there are automatic closures,
@@ -100,19 +154,24 @@ final public class JdbcRareWordDictionar
    }
 
    /**
-    *
+    * @return an sql call to use for term lookup
+    * @throws SQLException if the {@code PreparedStatement} could not be created or changed
+    */
+   private PreparedStatement createSelectCall( final String tableName ) throws SQLException {
+      final String lookupSql = "SELECT * FROM " + tableName + " WHERE RWORD = ?";
+      return _connection.prepareStatement( lookupSql );
+   }
+
+   /**
     * @param rareWordText text of the rare word to use for term lookup
     * @return an sql call to use for term lookup
     * @throws SQLException if the {@code PreparedStatement} could not be created or changed
     */
-   private PreparedStatement initMetaDataStatement( final String rareWordText ) throws SQLException {
-      if ( _metadataStatement == null ) {
-         final String lookupSql = "SELECT * FROM " + _tableName + " WHERE RWORD = ?";
-         _metadataStatement = _connection.prepareStatement( lookupSql );
-      }
-      _metadataStatement.clearParameters();
-      _metadataStatement.setString( 1, rareWordText );
-      return _metadataStatement;
+   private PreparedStatement fillSelectCall( final String rareWordText ) throws SQLException {
+      _selectTermCall.clearParameters();
+      _selectTermCall.setString( 1, rareWordText );
+      return _selectTermCall;
    }
 
+
 }

Modified: ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/MemRareWordDictionary.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/MemRareWordDictionary.java?rev=1624032&r1=1624031&r2=1624032&view=diff
==============================================================================
--- ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/MemRareWordDictionary.java (original)
+++ ctakes/trunk/ctakes-dictionary-lookup-fast/src/main/java/org/apache/ctakes/dictionary/lookup2/dictionary/MemRareWordDictionary.java Wed Sep 10 15:27:24 2014
@@ -19,14 +19,13 @@
 package org.apache.ctakes.dictionary.lookup2.dictionary;
 
 import org.apache.ctakes.dictionary.lookup2.term.RareWordTerm;
+import org.apache.ctakes.dictionary.lookup2.util.collection.CollectionMap;
 
 import java.util.Collection;
-import java.util.Collections;
-import java.util.Map;
 
 /**
  * A RareWordDictionary that uses a HashMap of Rare Words and Terms for lookup
- *
+ * <p/>
  * Author: SPF
  * Affiliation: CHIP-NLP
  * Date: 1/9/14
@@ -34,15 +33,16 @@ import java.util.Map;
 final public class MemRareWordDictionary extends AbstractRareWordDictionary {
 
    // Map of rare tokens to terms that contain those tokens.  Used like "First Word Token Lookup" but faster
-   final private Map<String,Collection<RareWordTerm>> _rareWordTermMap;
+   final private CollectionMap<String, RareWordTerm> _rareWordTermMap;
 
    /**
     * {@inheritDoc}
+    *
     * @param rareWordTermMap Map with a Rare Word (tokens) as key, and RareWordTerm Collection as value
     */
-   public MemRareWordDictionary( final String name, final String semanticGroup,
-                                 final Map<String, Collection<RareWordTerm>> rareWordTermMap ) {
-      super( name, semanticGroup );
+   public MemRareWordDictionary( final String name,
+                                 final CollectionMap<String, RareWordTerm> rareWordTermMap ) {
+      super( name );
       _rareWordTermMap = rareWordTermMap;
    }
 
@@ -51,11 +51,7 @@ final public class MemRareWordDictionary
     */
    @Override
    public Collection<RareWordTerm> getRareWordHits( final String rareWordText ) {
-      final Collection<RareWordTerm> hits = _rareWordTermMap.get( rareWordText );
-      if ( hits == null ) {
-         return Collections.emptyList();
-      }
-      return hits;
+      return _rareWordTermMap.getCollection( rareWordText );
    }
 
 }



Mime
View raw message