incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1446793 [2/3] - in /incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup: ./ ae/ algorithms/ filter/ jdbc/ lucene/ strtable/ vo/
Date Fri, 15 Feb 2013 22:26:54 GMT
Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java?rev=1446793&r1=1446792&r2=1446793&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java Fri Feb 15 22:26:53 2013
@@ -18,273 +18,236 @@
  */
 package org.apache.ctakes.dictionary.lookup.ae;
 
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.FSArray;
-
-
 import org.apache.ctakes.dictionary.lookup.DictionaryException;
 import org.apache.ctakes.dictionary.lookup.MetaDataHit;
 import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
-import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+
+import java.sql.SQLException;
+import java.util.*;
 
 /**
- * Implementation that takes UMLS dictionary lookup hits and stores as NamedEntity 
+ * Implementation that takes UMLS dictionary lookup hits and stores as NamedEntity
  * objects only the ones that have a SNOMED synonym.
- * Override abstract method <code>getSnomedCodes</code> and implement 
- * looking up the CUI->SNOMED mappings 
- * 
+ * Override abstract method <code>getSnomedCodes</code> and implement
+ * looking up the CUI->SNOMED mappings
+ *
  * @author Mayo Clinic
  */
 public abstract class UmlsToSnomedConsumerImpl extends BaseLookupConsumerImpl implements
-		LookupConsumer
-{
+                                                                              LookupConsumer {
 
-	private final String CUI_MF_PRP_KEY = "cuiMetaField";
-	private final String TUI_MF_PRP_KEY = "tuiMetaField";
+   static private final String CUI_MF_PRP_KEY = "cuiMetaField";
+   static private final String TUI_MF_PRP_KEY = "tuiMetaField";
 
-	private final String CODING_SCHEME_PRP_KEY = "codingScheme";
+   static private final String CODING_SCHEME_PRP_KEY = "codingScheme";
 
-	private final String ANT_SITE_TUIS_PRP_KEY = "anatomicalSiteTuis";
-	private final String PROCEDURE_TUIS_PRP_KEY = "procedureTuis";
-	private final String DISORDER_TUIS_PRP_KEY = "disorderTuis";
-	private final String FINDING_TUIS_PRP_KEY = "findingTuis";
-
-	private Set antSiteTuiSet = new HashSet();
-	private Set procedureTuiSet = new HashSet();
-	private Set disorderTuiSet = new HashSet();
-	private Set findingTuiSet = new HashSet();
-	private Set validTuiSet = new HashSet();
-	
-	protected Properties props;
-
-	
-	public UmlsToSnomedConsumerImpl(UimaContext aCtx, Properties properties)
-			throws Exception
-	{
-		// TODO property validation could be done here
-		props = properties;
-
-		antSiteTuiSet = loadList(props.getProperty(ANT_SITE_TUIS_PRP_KEY));
-		procedureTuiSet = loadList(props.getProperty(PROCEDURE_TUIS_PRP_KEY));
-		disorderTuiSet = loadList(props.getProperty(DISORDER_TUIS_PRP_KEY));
-		findingTuiSet = loadList(props.getProperty(FINDING_TUIS_PRP_KEY));
-
-		validTuiSet.addAll(antSiteTuiSet);
-		validTuiSet.addAll(procedureTuiSet);
-		validTuiSet.addAll(disorderTuiSet);
-		validTuiSet.addAll(findingTuiSet);
-	}
-
-	
-	/**
-	 * Searches for the Snomed codes that are synonyms of the UMLS concept with CUI <code>umlsCode</code>
-	 * 
-	 * @param umlsCode
-	 * @return Set of SNOMED codes for the given UMLS CUI.
-	 * @throws SQLException, DictionaryException
-	 */
-	protected abstract Set getSnomedCodes(String umlsCode) throws SQLException, DictionaryException;
-	
-
-	public void consumeHits(JCas jcas, Iterator lhItr)
-			throws AnalysisEngineProcessException
-	{
-		try
-		{
-
-			Iterator hitsByOffsetItr = organizeByOffset(lhItr);
-			while (hitsByOffsetItr.hasNext())
-			{
-				Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next();
-
-				// Iterate over the LookupHit objects and group Snomed codes by NE type
-				// For each NE type for which there is a hit, get all the Snomed codes
-				// that map to the given CUI.
-
-				// Use key "cui,tui" to avoid duplicates at this offset
-				Set cuiTuiSet = new HashSet();
-
-				// key = type of named entity (java.lang.Integer)
-				// val = set of UmlsConcept objects (java.util.Set)
-				Map conceptMap = new HashMap();
-				
-				Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator();
-				int neBegin = -1;
-				int neEnd = -1;
-				while (lhAtOffsetItr.hasNext())
-				{
-					LookupHit lh = (LookupHit) lhAtOffsetItr.next();
-					neBegin = lh.getStartOffset();
-					neEnd = lh.getEndOffset();
-
-					MetaDataHit mdh = lh.getDictMetaDataHit();
-					String cui = mdh.getMetaFieldValue(props.getProperty(CUI_MF_PRP_KEY));
-					String tui = mdh.getMetaFieldValue(props.getProperty(TUI_MF_PRP_KEY));
-										
-					//String text = lh.getDictMetaDataHit().getMetaFieldValue("text");
-					if (validTuiSet.contains(tui)) 
-					{
-						String cuiTuiKey = getUniqueKey(cui, tui);
-						if (!cuiTuiSet.contains(cuiTuiKey))
-						{
-							cuiTuiSet.add(cuiTuiKey);
-							Set snomedCodeSet = getSnomedCodes(cui);
-							if (snomedCodeSet.size() > 0)
-							{
-								Integer neType = new Integer(getNamedEntityType(tui));
-								Set conceptSet;
-								if (conceptMap.containsKey(neType)) {
-									conceptSet = (Set) conceptMap.get(neType);
-								}
-								else {
-									conceptSet = new HashSet();
-								}
-
-								Collection conceptCol = createConceptCol(
-										jcas,
-										cui,
-										tui,
-										snomedCodeSet);
-								conceptSet.addAll(conceptCol);
-
-								conceptMap.put(neType, conceptSet);
-							}
-						}
-					}
-				}
-
-				Iterator neTypeItr = conceptMap.keySet().iterator();
-				while (neTypeItr.hasNext())
-				{
-					Integer neType = (Integer) neTypeItr.next();
-					Set conceptSet = (Set) conceptMap.get(neType);
-
-					// Skip updating CAS if all Concepts for this type were filtered out
-					// for this span.
-					if (conceptSet.size() > 0) {
-						FSArray conceptArr = new FSArray(jcas, conceptSet.size());
-						int arrIdx = 0;
-						Iterator conceptItr = conceptSet.iterator();
-						while (conceptItr.hasNext())
-						{
-							UmlsConcept uc = (UmlsConcept) conceptItr.next();
-							conceptArr.set(arrIdx, uc);
-							arrIdx++;
-						}
-
-						IdentifiedAnnotation neAnnot;
-						if (neType.intValue() == CONST.NE_TYPE_ID_DRUG) {
-							neAnnot = new MedicationEventMention(jcas);	
-						} else {
-							neAnnot = new EntityMention(jcas);	
-						
-						}
-
-						neAnnot.setTypeID(neType.intValue());
-						neAnnot.setBegin(neBegin);
-						neAnnot.setEnd(neEnd);
-						neAnnot.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_DICT_LOOKUP);
-						neAnnot.setOntologyConceptArr(conceptArr);
-						neAnnot.addToIndexes();
-					}
-					
-				}
-			}
-		}
-		catch (Exception e) {
-			throw new AnalysisEngineProcessException(e);
-		}
-	}
-
-	private int getNamedEntityType(String tui) throws Exception
-	{
-		if (disorderTuiSet.contains(tui)) {
-			return CONST.NE_TYPE_ID_DISORDER;
-		}
-		else if (findingTuiSet.contains(tui)) {
-			return CONST.NE_TYPE_ID_FINDING;
-		}
-		else if (antSiteTuiSet.contains(tui)) {
-			return CONST.NE_TYPE_ID_ANATOMICAL_SITE;
-		}
-		else if (procedureTuiSet.contains(tui)) {
-			return CONST.NE_TYPE_ID_PROCEDURE;
-		}
-		else {
-			throw new Exception("TUI is not part of valid named entity types: "
-					+ tui);
-		}
-	}
-
-	/**
-	 * For each SNOMED code, create a corresponding JCas UmlsConcept object and
-	 * store in a Collection.
-	 * 
-	 * @param jcas
-	 * @param snomedCodesCol
-	 * @return
-	 */
-	private Collection createConceptCol(JCas jcas, String cui, String tui,
-			Collection snomedCodesCol)
-	{
-		List conceptList = new ArrayList();
-		Iterator codeItr = snomedCodesCol.iterator();
-		while (codeItr.hasNext())
-		{
-			String snomedCode = (String) codeItr.next();
-			UmlsConcept uc = new UmlsConcept(jcas);
-			uc.setCode(snomedCode);
-			uc.setCodingScheme(props.getProperty(CODING_SCHEME_PRP_KEY));
-			uc.setCui(cui);
-			uc.setTui(tui);
-			conceptList.add(uc);
-		}
-		return conceptList;
-	}
-
-	private String getUniqueKey(String cui, String tui)
-	{
-		StringBuffer sb = new StringBuffer();
-		sb.append(cui);
-		sb.append(':');
-		sb.append(tui);
-		return sb.toString();
-	}
-
-	/**
-	 * Load a comma delimited list
-	 * @param delimitedString
-	 * @return
-	 */
-	private Set loadList(String delimitedString)
-	{
-		String[] stringArr = delimitedString.split(",");
-		Set stringSet = new HashSet();
-		for (int i = 0; i < stringArr.length; i++)
-		{
-			String s = stringArr[i].trim();
-			if (s.length() > 0)
-			{
-				stringSet.add(s);
-			}
-		}
-		return stringSet;
-	}
-}
\ No newline at end of file
+   static private final String MEDICATION_TUIS_PRP_KEY = "medicationTuis";
+   static private final String ANT_SITE_TUIS_PRP_KEY = "anatomicalSiteTuis";
+   static private final String PROCEDURE_TUIS_PRP_KEY = "procedureTuis";
+   static private final String DISORDER_TUIS_PRP_KEY = "disorderTuis";
+   static private final String FINDING_TUIS_PRP_KEY = "findingTuis";
+
+   private Set<String> _medicationSet = new HashSet<String>();
+   private Set<String> _antSiteTuiSet = new HashSet<String>();
+   private Set<String> _procedureTuiSet = new HashSet<String>();
+   private Set<String> _disorderTuiSet = new HashSet<String>();
+   private Set<String> _findingTuiSet = new HashSet<String>();
+   private Set<String> _validTuiSet = new HashSet<String>();
+
+   protected Properties props;
+
+
+   public UmlsToSnomedConsumerImpl( final UimaContext aCtx, final Properties properties ) throws Exception {
+      // TODO property validation could be done here
+      props = properties;
+
+      _medicationSet = loadList( props.getProperty( MEDICATION_TUIS_PRP_KEY ) ); // 1
+      _antSiteTuiSet = loadList( props.getProperty( ANT_SITE_TUIS_PRP_KEY ) );   // 6
+      _procedureTuiSet = loadList( props.getProperty( PROCEDURE_TUIS_PRP_KEY ) );// 5
+      _disorderTuiSet = loadList( props.getProperty( DISORDER_TUIS_PRP_KEY ) );  // 2
+      _findingTuiSet = loadList( props.getProperty( FINDING_TUIS_PRP_KEY ) );    // 3  aka sign/symptom
+
+      _validTuiSet.addAll( _medicationSet );
+      _validTuiSet.addAll( _antSiteTuiSet );
+      _validTuiSet.addAll( _procedureTuiSet );
+      _validTuiSet.addAll( _disorderTuiSet );
+      _validTuiSet.addAll( _findingTuiSet );
+   }
+
+
+   /**
+    * Searches for the Snomed codes that are synonyms of the UMLS concept with CUI <code>umlsCode</code>
+    *
+    * @param umlsCode                                   -
+    * @return Set of SNOMED codes for the given UMLS CUI.
+    * @throws SQLException, DictionaryException
+    */
+   protected abstract Set<String> getSnomedCodes( final String umlsCode ) throws SQLException, DictionaryException;
+
+
+   public void consumeHits( final JCas jcas, final Iterator lhItr ) throws AnalysisEngineProcessException {
+      try {
+         final String cuiPropKey = props.getProperty( CUI_MF_PRP_KEY );
+         final String tuiPropKey = props.getProperty( TUI_MF_PRP_KEY );
+         final Iterator hitsByOffsetItr = organizeByOffset( lhItr );
+         while ( hitsByOffsetItr.hasNext() ) {
+            final Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next();
+
+            // Iterate over the LookupHit objects and group Snomed codes by NE type
+            // For each NE type for which there is a hit, get all the Snomed codes
+            // that map to the given CUI.
+
+            // Use key "cui,tui" to avoid duplicates at this offset
+            final Set<String> cuiTuiSet = new HashSet<String>();
+
+            // key = type of named entity (java.lang.Integer)
+            // val = set of UmlsConcept objects (java.util.Set)
+            final Map<Integer,Set<UmlsConcept>> conceptMap = new HashMap<Integer,Set<UmlsConcept>>();
+
+            final Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator();
+            int neBegin = -1;
+            int neEnd = -1;
+            while ( lhAtOffsetItr.hasNext() ) {
+               final LookupHit lh = (LookupHit) lhAtOffsetItr.next();
+               neBegin = lh.getStartOffset();
+               neEnd = lh.getEndOffset();
+
+               final MetaDataHit mdh = lh.getDictMetaDataHit();
+               final String cui = mdh.getMetaFieldValue( cuiPropKey );
+               final String tui = mdh.getMetaFieldValue( tuiPropKey );
+
+               //String text = lh.getDictMetaDataHit().getMetaFieldValue("text");
+               if ( !_validTuiSet.contains( tui ) ) {
+                  continue;
+               }
+               final String cuiTuiKey = getUniqueKey( cui, tui );
+               if ( cuiTuiSet.contains( cuiTuiKey ) ) {
+                  continue;
+               }
+               cuiTuiSet.add( cuiTuiKey );
+               final Set<String> snomedCodeSet = getSnomedCodes( cui );
+               if ( !snomedCodeSet.isEmpty() ) {
+                  final Integer neType = getNamedEntityType( tui );
+                  Set<UmlsConcept> conceptSet;
+                  if ( conceptMap.containsKey( neType ) ) {
+                     conceptSet = conceptMap.get( neType );
+                  } else {
+                     conceptSet = new HashSet<UmlsConcept>();
+                  }
+                  final Collection<UmlsConcept> conceptCol = createConceptCol( jcas, cui, tui, snomedCodeSet );
+                  conceptSet.addAll( conceptCol );
+                  conceptMap.put( neType, conceptSet );
+               }
+            }
+
+            final Collection<Integer> conceptKeys = conceptMap.keySet();
+            for ( Integer conceptKey : conceptKeys ) {
+               final Set<UmlsConcept> conceptSet = conceptMap.get( conceptKey );
+
+               // Skip updating CAS if all Concepts for this type were filtered out
+               // for this span.
+               if ( !conceptSet.isEmpty() ) {
+                  FSArray conceptArr = new FSArray( jcas, conceptSet.size() );
+                  int arrIdx = 0;
+                  for ( UmlsConcept umlsConcept : conceptSet ) {
+                     conceptArr.set( arrIdx, umlsConcept );
+                     arrIdx++;
+                  }
+
+                  IdentifiedAnnotation neAnnot;
+                  if ( conceptKey == CONST.NE_TYPE_ID_DRUG ) {
+                     neAnnot = new MedicationEventMention( jcas );
+                  } else {
+                     neAnnot = new EntityMention( jcas );
+                  }
+                  neAnnot.setTypeID( conceptKey );
+                  neAnnot.setBegin( neBegin );
+                  neAnnot.setEnd( neEnd );
+                  neAnnot.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP );
+                  neAnnot.setOntologyConceptArr( conceptArr );
+                  neAnnot.addToIndexes();
+               }
+            }
+         }
+      } catch ( Exception e ) {
+         throw new AnalysisEngineProcessException( e );
+      }
+   }
+
+   private int getNamedEntityType( final String tui ) throws IllegalArgumentException {
+      if ( _medicationSet.contains( tui ) ) {
+         return CONST.NE_TYPE_ID_DRUG;
+      } else if ( _disorderTuiSet.contains( tui ) ) {
+         return CONST.NE_TYPE_ID_DISORDER;
+      } else if ( _findingTuiSet.contains( tui ) ) {
+         return CONST.NE_TYPE_ID_FINDING;
+      } else if ( _antSiteTuiSet.contains( tui ) ) {
+         return CONST.NE_TYPE_ID_ANATOMICAL_SITE;
+      } else if ( _procedureTuiSet.contains( tui ) ) {
+         return CONST.NE_TYPE_ID_PROCEDURE;
+      } else {
+         throw new IllegalArgumentException( "TUI is not part of valid named entity types: " + tui );
+      }
+   }
+
+   /**
+    * For each SNOMED code, create a corresponding JCas UmlsConcept object and
+    * store in a Collection.
+    *
+    * @param jcas -
+    * @param snomedCodesCol -
+    * @return -
+    */
+   private Collection<UmlsConcept> createConceptCol( final JCas jcas, final String cui, final String tui,
+                                        final Collection<String> snomedCodesCol ) {
+      final String codingSchemeKey = props.getProperty( CODING_SCHEME_PRP_KEY );
+      final List<UmlsConcept> conceptList = new ArrayList<UmlsConcept>();
+      for ( String snomedCode : snomedCodesCol ) {
+         final UmlsConcept uc = new UmlsConcept( jcas );
+         uc.setCode( snomedCode );
+         uc.setCodingScheme( codingSchemeKey );
+         uc.setCui( cui );
+         uc.setTui( tui );
+         conceptList.add( uc );
+      }
+      return conceptList;
+   }
+
+   private String getUniqueKey( final String cui, final String tui ) {
+      final StringBuilder sb = new StringBuilder();
+      sb.append( cui );
+      sb.append( ':' );
+      sb.append( tui );
+      return sb.toString();
+   }
+
+   /**
+    * Load a comma delimited list
+    *
+    * @param delimitedString -
+    * @return -
+    */
+   private Set<String> loadList( final String delimitedString ) {
+      if ( delimitedString == null || delimitedString.isEmpty() ) {
+         return Collections.emptySet();
+      }
+      final String[] stringArray = delimitedString.split( "," );
+      final Set<String> stringSet = new HashSet<String>();
+      for ( String text : stringArray ) {
+         final String trimText = text.trim();
+         if ( !trimText.isEmpty() ) {
+            stringSet.add( trimText );
+         }
+      }
+      return stringSet;
+   }
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedDbConsumerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedDbConsumerImpl.java?rev=1446793&r1=1446792&r2=1446793&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedDbConsumerImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedDbConsumerImpl.java Fri Feb 15 22:26:53 2013
@@ -18,84 +18,76 @@
  */
 package org.apache.ctakes.dictionary.lookup.ae;
 
-import java.sql.Connection;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.HashSet;
-import java.util.Properties;
-import java.util.Set;
-
 import org.apache.ctakes.core.resource.JdbcConnectionResource;
-import org.apache.uima.UimaContext;
- 
+import org.apache.uima.UimaContext;
+
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.HashSet;
+import java.util.Properties;
+import java.util.Set;
+
 
 /**
- * Implementation that takes UMLS dictionary lookup hits and stores as NamedEntity 
- * objects only the ones that have a SNOMED synonym, by looking in a database 
+ * Implementation that takes UMLS dictionary lookup hits and stores as NamedEntity
+ * objects only the ones that have a SNOMED synonym, by looking in a database
  * for SNOMED codes that map to the identified CUI.
- * 
+ *
  * @author Mayo Clinic
  */
-public class UmlsToSnomedDbConsumerImpl extends UmlsToSnomedConsumerImpl implements
-		LookupConsumer
-{
-	
-	private final String DB_CONN_RESRC_KEY_PRP_KEY = "dbConnExtResrcKey";
-	private final String MAP_PREP_STMT_PRP_KEY = "mapPrepStmt";
-	//ohnlp-Bugs-3296301 fix limited search results to fixed 100 records.
-	// Added 'MaxListSize'
-	private static int iv_maxListSize;
-	private PreparedStatement mapPrepStmt;
-
-	public UmlsToSnomedDbConsumerImpl(UimaContext aCtx, Properties properties, int maxListSize)
-			throws Exception
-	{
-		super(aCtx, properties);
-		iv_maxListSize = maxListSize;
-		String resrcName = props.getProperty(DB_CONN_RESRC_KEY_PRP_KEY);
-		JdbcConnectionResource resrc = (JdbcConnectionResource) aCtx.getResourceObject(resrcName);
-
-		String prepStmtSql = props.getProperty(MAP_PREP_STMT_PRP_KEY);
-		Connection conn = resrc.getConnection();
-		mapPrepStmt = conn.prepareStatement(prepStmtSql);
-
-	}
-
-
-	public UmlsToSnomedDbConsumerImpl(UimaContext aCtx, Properties properties)
-			throws Exception
-	{
-		super(aCtx, properties);
-		String resrcName = props.getProperty(DB_CONN_RESRC_KEY_PRP_KEY);
-		JdbcConnectionResource resrc = (JdbcConnectionResource) aCtx.getResourceObject(resrcName);
-
-		String prepStmtSql = props.getProperty(MAP_PREP_STMT_PRP_KEY);
-		Connection conn = resrc.getConnection();
-		mapPrepStmt = conn.prepareStatement(prepStmtSql);
-
-	}
-
-	/**
-	 * Queries the given UMLS CUI against the DB. Returns a set of SNOMED codes.
-	 * 
-	 * @param umlsCode
-	 * @return
-	 * @throws SQLException
-	 */
-	protected Set getSnomedCodes(String umlsCode) throws SQLException
-	{
-		Set codeSet = new HashSet();
-		mapPrepStmt.setString(1, umlsCode);
-		ResultSet rs = mapPrepStmt.executeQuery();
-		while (rs.next())
-		{
-			String snomedCode = rs.getString(1).trim();
-			codeSet.add(snomedCode);
-		}
-		
-		return codeSet;
+public class UmlsToSnomedDbConsumerImpl extends UmlsToSnomedConsumerImpl implements LookupConsumer {
 
-	}
+   static private final String DB_CONN_RESRC_KEY_PRP_KEY = "dbConnExtResrcKey";
+   static private final String MAP_PREP_STMT_PRP_KEY = "mapPrepStmt";
+   //ohnlp-Bugs-3296301 fix limited search results to fixed 100 records.
+   // Added 'MaxListSize'
+   private static int _maxListSize;
+   final private PreparedStatement _preparedStatement;
+
+   public UmlsToSnomedDbConsumerImpl( final UimaContext uimaContext, final Properties properties, final int maxListSize )
+         throws Exception {
+      super( uimaContext, properties );
+      _maxListSize = maxListSize;
+      final String resourceName = props.getProperty( DB_CONN_RESRC_KEY_PRP_KEY );
+      final JdbcConnectionResource resrc = (JdbcConnectionResource) uimaContext.getResourceObject( resourceName );
+
+      final String sqlStatement = props.getProperty( MAP_PREP_STMT_PRP_KEY );
+      final Connection connection = resrc.getConnection();
+      _preparedStatement = connection.prepareStatement( sqlStatement );
+
+   }
+
+
+   public UmlsToSnomedDbConsumerImpl( final UimaContext uimaContext, final Properties properties )
+         throws Exception {
+      super( uimaContext, properties );
+      final String resourceName = props.getProperty( DB_CONN_RESRC_KEY_PRP_KEY );
+      final JdbcConnectionResource resource = (JdbcConnectionResource) uimaContext.getResourceObject( resourceName );
+
+      final String sqlStatement = props.getProperty( MAP_PREP_STMT_PRP_KEY );
+      final Connection connection = resource.getConnection();
+      _preparedStatement = connection.prepareStatement( sqlStatement );
+
+   }
+
+   /**
+    * Queries the given UMLS CUI against the DB. Re`turns a set of SNOMED codes.
+    *
+    * @param umlsCode -
+    * @return          -
+    * @throws SQLException
+    */
+   protected Set<String> getSnomedCodes( final String umlsCode ) throws SQLException {
+      final Set<String> codeSet = new HashSet<String>();
+      _preparedStatement.setString( 1, umlsCode );
+      final ResultSet rs = _preparedStatement.executeQuery();
+      while ( rs.next() ) {
+         final String snomedCode = rs.getString( 1 ).trim();
+         codeSet.add( snomedCode );
+      }
+      return codeSet;
+   }
 
-}
\ No newline at end of file
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedLuceneConsumerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedLuceneConsumerImpl.java?rev=1446793&r1=1446792&r2=1446793&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedLuceneConsumerImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedLuceneConsumerImpl.java Fri Feb 15 22:26:53 2013
@@ -19,123 +19,114 @@
 package org.apache.ctakes.dictionary.lookup.ae;
 
 
-import java.io.File;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Properties;
-import java.util.Set;
-
 import org.apache.ctakes.core.resource.FileResource;
 import org.apache.ctakes.dictionary.lookup.DictionaryException;
 import org.apache.ctakes.dictionary.lookup.MetaDataHit;
 import org.apache.ctakes.dictionary.lookup.lucene.LuceneDictionaryImpl;
-import org.apache.log4j.Logger;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
-
+import org.apache.log4j.Logger;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Properties;
+import java.util.Set;
+
 
 /**
- * Implementation that takes UMLS dictionary lookup hits and stores as NamedEntity 
- * objects only the ones that have a SNOMED synonym, by looking in a lucene index 
+ * Implementation that takes UMLS dictionary lookup hits and stores as NamedEntity
+ * objects only the ones that have a SNOMED synonym, by looking in a lucene index
  * for SNOMED codes that map to the identified CUI.
- * 
+ *
  * @author Mayo Clinic
  */
 public class UmlsToSnomedLuceneConsumerImpl extends UmlsToSnomedConsumerImpl implements
-		LookupConsumer
-{
+                                                                             LookupConsumer {
 
-	// LOG4J logger based on class name
-	private Logger logger = Logger.getLogger(getClass().getName());
+   // LOG4J logger based on class name
+   private Logger logger = Logger.getLogger( getClass().getName() );
 
-	//ohnlp-Bugs-3296301 limits the search results to fixed 100 records.
-	// Added 'MaxListSize'
-	private static int iv_maxListSize;
-	private final String SNOMED_MAPPING_PRP_KEY = "snomedCodeMappingField";
-	private final String CUI_MAPPING_PRP_KEY = "cuiMappingField";
-	private final String SNOMED_CODE_LIST_CONFIG_PARM = "CodesListIndexDirectory";
-	
-	private LuceneDictionaryImpl snomedLikeCodesIndex;
-
-	public UmlsToSnomedLuceneConsumerImpl(UimaContext aCtx, Properties properties)
-			throws Exception
-	{
-		this(aCtx,properties,Integer.MAX_VALUE);
-	}
-
-	// ohnlp Bugs tracker ID: 3390078 do not reload lucene index for each document, load in constructor
-	public UmlsToSnomedLuceneConsumerImpl(UimaContext aCtx, Properties properties, int maxListSize)
-			throws Exception
-	{
-		super(aCtx,properties);
-		iv_maxListSize = maxListSize;
-		
-		IndexReader indexReader;
-		String indexDirAbsPath = null;
-		try {
-			
-			// ohnlp Bugs tracker ID: 3425014 SNOMED lucene dictionary lookup hardcodes resource path 
-			FileResource fResrc = (FileResource) aCtx.getResourceObject(SNOMED_CODE_LIST_CONFIG_PARM);
-			if (fResrc == null) logger.error("Unable to find config parm " + SNOMED_CODE_LIST_CONFIG_PARM +  ".");
-			File indexDir = fResrc.getFile();
-			indexDirAbsPath = indexDir.getAbsolutePath();
-
-			try {
-				logger.info("Using lucene index: " + indexDir.getAbsolutePath());
-			}
-			catch (Exception e) {
-				throw new AnnotatorConfigurationException(e);
-			}
-
-			// For the sample dictionary, we use the following lucene index.
-			//indexPath = "lookup/snomed-like_codes_sample";
-			
-			indexReader = IndexReader.open(FSDirectory.open(indexDir)); 
-
-			IndexSearcher indexSearcher = new IndexSearcher(indexReader);
-			String lookupFieldName = props.getProperty(CUI_MAPPING_PRP_KEY);
-			
-			// We will lookup entries based on lookupFieldName
-			snomedLikeCodesIndex = new LuceneDictionaryImpl(indexSearcher, lookupFieldName, iv_maxListSize);
-			
-			logger.info("Loaded Lucene index with "+ indexReader.numDocs() +" entries.");
-	        
-		} catch (IOException ioe) {
-			
-		    logger.info("Lucene index: " + indexDirAbsPath);
-		    throw new DictionaryException(ioe);
-		    
-		}
-
-	}
-
-
-	/**
-	 * Find all Snomed codes that map to the given UMLS code (CUI),
-	 * by looking in a lucene index
-	 * 
-	 * @param umlsCode a UMLS CUI
-	 * @return Set of Snomed codes that map to the given UMLS code (CUI).
-	 * @see getSnomedCodes in <code>UmlsToSnomedConsumerImpl</code> for example of using a database
-	 */
-	protected Set getSnomedCodes(String umlsCode) throws DictionaryException
-	{
-		Set codeSet = new HashSet();
-		
-		String valueFieldName = props.getProperty(SNOMED_MAPPING_PRP_KEY);
-		// Get the entries with field lookupFieldName having value umlsCode
-		Collection<MetaDataHit> mdhCollection = snomedLikeCodesIndex.getEntries(umlsCode);
-	        
-		for (MetaDataHit mdh: mdhCollection) {
-		    codeSet.add(mdh.getMetaFieldValue(valueFieldName));
-		}
-		
-		return codeSet;
+   //ohnlp-Bugs-3296301 limits the search results to fixed 100 records.
+   // Added 'MaxListSize'
+   private static int iv_maxListSize;
+   private final String SNOMED_MAPPING_PRP_KEY = "snomedCodeMappingField";
+   private final String CUI_MAPPING_PRP_KEY = "cuiMappingField";
+   private final String SNOMED_CODE_LIST_CONFIG_PARM = "CodesListIndexDirectory";
+
+   private LuceneDictionaryImpl snomedLikeCodesIndex;
+
+   public UmlsToSnomedLuceneConsumerImpl( UimaContext aCtx, Properties properties )
+         throws Exception {
+      this( aCtx, properties, Integer.MAX_VALUE );
+   }
+
+   // ohnlp Bugs tracker ID: 3390078 do not reload lucene index for each document, load in constructor
+   public UmlsToSnomedLuceneConsumerImpl( UimaContext aCtx, Properties properties, int maxListSize )
+         throws Exception {
+      super( aCtx, properties );
+      iv_maxListSize = maxListSize;
+
+      IndexReader indexReader;
+      String indexDirAbsPath = null;
+      try {
+
+         // ohnlp Bugs tracker ID: 3425014 SNOMED lucene dictionary lookup hardcodes resource path
+         FileResource fResrc = (FileResource) aCtx.getResourceObject( SNOMED_CODE_LIST_CONFIG_PARM );
+         if ( fResrc == null ) {
+            logger.error( "Unable to find config parm " + SNOMED_CODE_LIST_CONFIG_PARM + "." );
+         }
+         File indexDir = fResrc.getFile();
+         indexDirAbsPath = indexDir.getAbsolutePath();
+
+         try {
+            logger.info( "Using lucene index: " + indexDir.getAbsolutePath() );
+         } catch ( Exception e ) {
+            throw new AnnotatorConfigurationException( e );
+         }
+
+         // For the sample dictionary, we use the following lucene index.
+         //indexPath = "lookup/snomed-like_codes_sample";
+
+         indexReader = IndexReader.open( FSDirectory.open( indexDir ) );
+
+         IndexSearcher indexSearcher = new IndexSearcher( indexReader );
+         String lookupFieldName = props.getProperty( CUI_MAPPING_PRP_KEY );
+
+         // We will lookup entries based on lookupFieldName
+         snomedLikeCodesIndex = new LuceneDictionaryImpl( indexSearcher, lookupFieldName, iv_maxListSize );
+
+         logger.info( "Loaded Lucene index with " + indexReader.numDocs() + " entries." );
+
+      } catch ( IOException ioe ) {
+         logger.info( "Lucene index: " + indexDirAbsPath );
+         throw new DictionaryException( ioe );
+      }
+   }
+
+
+   /**
+    * Find all Snomed codes that map to the given UMLS code (CUI),
+    * by looking in a lucene index
+    *
+    * @param umlsCode a UMLS CUI
+    * @return Set of Snomed codes that map to the given UMLS code (CUI).
+    * @see getSnomedCodes in <code>UmlsToSnomedConsumerImpl</code> for example of using a database
+    */
+   protected Set<String> getSnomedCodes( final String umlsCode ) throws DictionaryException {
+      final Set<String> codeSet = new HashSet<String>();
+      final String valueFieldName = props.getProperty( SNOMED_MAPPING_PRP_KEY );
+      // Get the entries with field lookupFieldName having value umlsCode
+      final Collection<MetaDataHit> mdhCollection = snomedLikeCodesIndex.getEntries( umlsCode );
+      for ( MetaDataHit mdh : mdhCollection ) {
+         codeSet.add( mdh.getMetaFieldValue( valueFieldName ) );
+      }
+      return codeSet;
 
-	}
+   }
 
-}
\ No newline at end of file
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/DirectPassThroughImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/DirectPassThroughImpl.java?rev=1446793&r1=1446792&r2=1446793&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/DirectPassThroughImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/DirectPassThroughImpl.java Fri Feb 15 22:26:53 2013
@@ -27,6 +27,7 @@ import java.util.Map;
 import org.apache.ctakes.dictionary.lookup.DictionaryEngine;
 import org.apache.ctakes.dictionary.lookup.MetaDataHit;
 import org.apache.ctakes.dictionary.lookup.phrasebuilder.PhraseBuilder;
+import org.apache.ctakes.dictionary.lookup.vo.LookupAnnotation;
 import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
 import org.apache.ctakes.dictionary.lookup.vo.LookupToken;
 
@@ -53,12 +54,17 @@ public class DirectPassThroughImpl imple
         iv_phrBuilder = phraseBuilder;
     }
 
-    public Collection lookup(List ltList, Map ctxMap) throws Exception
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+    public Collection<LookupHit> lookup(final List<LookupToken> lookupTokenList,
+                                        final Map<String,List<LookupAnnotation>> contextMap) throws Exception
     {
         List lhList = new ArrayList();
-        for (int tokenIdx = 0; tokenIdx < ltList.size(); tokenIdx++)
+        for (int tokenIdx = 0; tokenIdx < lookupTokenList.size(); tokenIdx++)
         {
-            LookupToken lt = (LookupToken) ltList.get(tokenIdx);
+            LookupToken lt = (LookupToken) lookupTokenList.get(tokenIdx);
 
             List singleLtList = new ArrayList();
             singleLtList.add(lt);
@@ -95,4 +101,4 @@ public class DirectPassThroughImpl imple
         return mdhCol;
     }
 
-}
\ No newline at end of file
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/FirstTokenPermutationImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/FirstTokenPermutationImpl.java?rev=1446793&r1=1446792&r2=1446793&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/FirstTokenPermutationImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/FirstTokenPermutationImpl.java Fri Feb 15 22:26:53 2013
@@ -18,16 +18,6 @@
  */
 package org.apache.ctakes.dictionary.lookup.algorithms;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
 import org.apache.ctakes.dictionary.lookup.DictionaryEngine;
 import org.apache.ctakes.dictionary.lookup.MetaDataHit;
 import org.apache.ctakes.dictionary.lookup.phrasebuilder.PhraseBuilder;
@@ -36,670 +26,529 @@ import org.apache.ctakes.dictionary.look
 import org.apache.ctakes.dictionary.lookup.vo.LookupToken;
 import org.apache.log4j.Logger;
 
+import java.util.*;
+
 
 /**
  * <b>OVERVIEW: </b> Each LookupToken is fed into a "first token" Dictionary. A
  * hit indicates an anchor and the window around this anchor is based on
  * context. This hit also contains all the presentations from the Dictionary
  * where the "first token" is contained.
- * 
+ * <p/>
  * The window is determined by finding the largest overlapping context window
  * annotation. Permutations of LookupTokens found within this window are used to
  * match against the presentations found earlier. If context window annotations
  * are not provided, a fixed window is used based on the specified max
  * permutation level.
- * 
+ * <p/>
  * <b>OPTIONAL CONTEXT: </b> context window annotations
- * 
+ *
  * @author Mayo Clinic
  */
-public class FirstTokenPermutationImpl implements LookupAlgorithm
-{
-    // LOG4J logger based on class name
-    private Logger iv_logger = Logger.getLogger(getClass().getName());
-
-    /**
-     * Key value for context map. Value is expected to be a List of
-     * LookupAnnotation objects in sorted order.
-     */
-    public static final String CTX_KEY_WINDOW_ANNOTATIONS = "WINDOW_ANNOTATIONS";
-
-    /**
-     * Key value for LookupToken attribute. Value is expected to be either TRUE
-     * or FALSE. This indicates whether to use this token for a "first token"
-     * lookup or not. This is optional.
-     */
-    public static final String LT_KEY_USE_FOR_LOOKUP = "USE_FOR_LOOKUP";
-
-    private DictionaryEngine iv_firstTokenDictEngine;
-    private PhraseBuilder iv_phrBuilder;
-
-    private int iv_maxPermutationLevel;
-    // key = level Integer, value = Permutation list
-    private Map iv_permCacheMap = new HashMap();
-
-    private String[] iv_textMetaFieldNames;
-
-    /**
-     * Constructor
-     * 
-     * @param firstTokenDictEngine
-     *            Dictionary that is indexed against first tokens.
-     * @param phraseBuilder
-     *            Builds phrases to match against Dictionary.
-     * @param textMetaFieldNames
-     *            MetaFieldNames used to extract presentations.
-     * @param maxPermutationLevel
-     *            Max permutation Level allowed.
-     */
-    public FirstTokenPermutationImpl(DictionaryEngine firstTokenDictEngine,
-            PhraseBuilder phraseBuilder, String textMetaFieldNames[],
-            int maxPermutationLevel)
-    {
-        iv_firstTokenDictEngine = firstTokenDictEngine;
-        iv_phrBuilder = phraseBuilder;
-        iv_textMetaFieldNames = textMetaFieldNames;
-
-        iv_maxPermutationLevel = maxPermutationLevel;
-        for (int i = 0; i <= maxPermutationLevel; i++)
-        {
-            Integer level = new Integer(i);
-            List permList = PermutationUtil.getPermutationList(i);
-            iv_permCacheMap.put(level, permList);
-        }
-    }
-
-    /**
-     * Implementation of algorithm.
-     */
-    public Collection lookup(List ltList, Map ctxMap) throws Exception
-    {
-        // setup optional window context data
-        boolean useWindowAnnots = false;
-        List wAnnotList = getWindowAnnotations(ctxMap);
-        if (wAnnotList.size() > 0)
-        {
-            useWindowAnnots = true;
-        }
-        Map wStartOffsetMap = getStartOffsetMap(wAnnotList, true);
-        Map wEndOffsetMap = getEndOffsetMap(wAnnotList, true);
-
-        Map ltListIndexMap = getListIndexMap(ltList);
-        Map ltStartOffsetMap = getStartOffsetMap(ltList, true);
-        Map ltEndOffsetMap = getEndOffsetMap(ltList, true);
-
-        List lhList = new ArrayList();
-        for (int ltIdx = 0; ltIdx < ltList.size(); ltIdx++)
-        {
-            LookupToken lt = (LookupToken) ltList.get(ltIdx);
-
-            Boolean useForLookup = Boolean.valueOf(lt.getStringAttribute(LT_KEY_USE_FOR_LOOKUP));
-
-            if ((useForLookup == null) || (useForLookup.booleanValue()))
-            {
-                Collection mdhCol = getFirstTokenHits(lt);
-
-                if ((mdhCol != null) && (mdhCol.size() > 0))
-                {
-                    int wEndOffset = -1;
-                    if (useWindowAnnots)
-                    {
-                        // get the largest overlapping window annotation
-                        LookupAnnotation wAnnot = getLargestWindowAnnotation(
-                                ltIdx,
-                                lt,
-                                ltStartOffsetMap,
-                                ltEndOffsetMap,
-                                ltListIndexMap,
-                                wStartOffsetMap,
-                                wEndOffsetMap);
-                        if (wAnnot != null)
-                        {
-                            wEndOffset = wAnnot.getEndOffset();
-                        }
-                    }
-                    if (wEndOffset == -1)
-                    {
-                        iv_logger.debug("Window size set to max perm level.");
-                        wEndOffset = getFixedWindowEndOffset(ltIdx, lt, ltList);
-                    }
-
-                    List endLookupTokenList = getLookupTokenList(
-                            wEndOffset,
-                            ltEndOffsetMap,
-                            false);
-                    LookupToken endLookupToken = (LookupToken) endLookupTokenList.get(endLookupTokenList.size() - 1);
-
-                    int startTokenIdx = ltIdx;
-                    int endTokenIdx = ((Integer) ltListIndexMap.get(endLookupToken)).intValue();
-
-                    // list of LookupToken objects bound by the window
-                    List wLookupTokenList = ltList.subList(
-                            startTokenIdx,
-                            endTokenIdx + 1);
-
-                    // use permutation algorithm to find any hits inside the window
-                    Collection lhCol = getLookupHits(
-                            mdhCol,
-                            wLookupTokenList,
-                            new Integer(ltIdx - startTokenIdx));
-
-                    lhList.addAll(lhCol);
-                }
-            }
-        }
-
-        return lhList;
-    }
-
-    private Collection getLookupHits(
-            Collection mdhCol,
-            List wLookupTokenList,
-            Integer firstTokenIndex) throws Exception
-    {
-        if ((wLookupTokenList.size() - 1) > iv_maxPermutationLevel)
-        {
-            iv_logger.debug("Beyond permutation cache size.");
-            return new ArrayList();
-        }
-
-        // build a list of index values (excludes index of first token)
-        List idxList = new ArrayList();
-        for (int i = 0; i < wLookupTokenList.size(); i++)
-        {
-            if (i != firstTokenIndex.intValue())
-            {
-                idxList.add(new Integer(i));
-            }
-        }
-
-        Collection permCol = (Collection) iv_permCacheMap.get(new Integer(
-                idxList.size()));
-
-        List lhList = new ArrayList();
-
-        Map mdhMap = new HashMap();
-        Iterator mdhItr = mdhCol.iterator();
-        while (mdhItr.hasNext())
-        {
-            MetaDataHit mdh = (MetaDataHit) mdhItr.next();
-            for (int i = 0; i < iv_textMetaFieldNames.length; i++)
-            {
-                String text = mdh.getMetaFieldValue(iv_textMetaFieldNames[i]);
-                if (text != null)
-                {
-                    text = text.toLowerCase();
-                    Set mdhSet = (Set) mdhMap.get(text);
-                    if (mdhSet == null)
-                    {
-                        mdhSet = new HashSet();
-                    }
-                    mdhSet.add(mdh);
-                    mdhMap.put(text, mdhSet);
-                }
-                else
-                {
-                    if (iv_logger.isDebugEnabled())
-                    {
-                        iv_logger.debug("MetaField "
-                                + iv_textMetaFieldNames[i]
-                                + " contains no data.");
-                    }
-                }
-            }
-        }
-
-        LookupToken firstWordLookupToken = (LookupToken) wLookupTokenList.get(firstTokenIndex.intValue());
-
-        Iterator permItr = permCol.iterator();
-        while (permItr.hasNext())
-        {
-            // convert permutation idx back into LookupTokens
-            List tempList = new ArrayList();
-            List permutation = (List) permItr.next();
-            Iterator idxItr = permutation.iterator();
-            while (idxItr.hasNext())
-            {
-                int idx = ((Integer) idxItr.next()).intValue();
-                if (idx <= firstTokenIndex.intValue())
-                {
-                    idx--;
-                }
-                LookupToken lt = (LookupToken) wLookupTokenList.get(idx);
-                tempList.add(lt);
-            }
-
-            List singleTokenList = new ArrayList();
-            singleTokenList.add(firstWordLookupToken);
-            String[] fwPerms = iv_phrBuilder.getPhrases(singleTokenList);
-
-            String[] phrArr = iv_phrBuilder.getPhrases(tempList);
-            for (int i = 0; i < phrArr.length; i++)
-            {
-                for (int fwPermIdx = 0; fwPermIdx < fwPerms.length; fwPermIdx++)
-                {
-                    StringBuffer phraseSB = new StringBuffer();
-                    phraseSB.append(fwPerms[fwPermIdx]);
-                    phraseSB.append(' ');
-                    phraseSB.append(phrArr[i]);
-                    String phrase = phraseSB.toString().trim().toLowerCase();
-                    Set mdhSet = (Set) mdhMap.get(phrase);
-                    if (mdhSet != null)
-                    {
-                        Iterator mdhIterator = mdhSet.iterator();
-                        while (mdhIterator.hasNext())
-                        {
-                            MetaDataHit mdh = (MetaDataHit) mdhIterator.next();
-                            // figure out start and end offsets
-                            Collections.sort(permutation);
-
-                            int startOffset;
-                            if (permutation.size() > 0)
-                            {
-                                int firstIdx = ((Integer) permutation.get(0)).intValue();
-                                if (firstIdx <= firstTokenIndex.intValue())
-                                {
-                                    firstIdx--;
-                                }
-                                LookupToken lt = (LookupToken) wLookupTokenList.get(firstIdx);
-                                if (lt.getStartOffset() < firstWordLookupToken.getStartOffset())
-                                {
-                                    startOffset = lt.getStartOffset();
-                                }
-                                else
-                                {
-                                    startOffset = firstWordLookupToken.getStartOffset();
-                                }
-                            }
-                            else
-                            {
-                                startOffset = firstWordLookupToken.getStartOffset();
-                            }
-
-                            int endOffset;
-                            if (permutation.size() > 0)
-                            {
-                                int lastIdx = ((Integer) permutation.get(permutation.size() - 1)).intValue();
-                                if (lastIdx <= firstTokenIndex.intValue())
-                                {
-                                    lastIdx--;
-                                }
-                                LookupToken lt = (LookupToken) wLookupTokenList.get(lastIdx);
-                                if (lt.getEndOffset() > firstWordLookupToken.getEndOffset())
-                                {
-                                    endOffset = lt.getEndOffset();
-                                }
-                                else
-                                {
-                                    endOffset = firstWordLookupToken.getEndOffset();
-                                }
-                            }
-                            else
-                            {
-                                endOffset = firstWordLookupToken.getEndOffset();
-                            }
-
-                            LookupHit lh = new LookupHit(
-                                    mdh,
-                                    startOffset,
-                                    endOffset);
-
-                            lhList.add(lh);
-                        }
-                    }
-                }
-            }
-        }
-        return lhList;
-    }
-
-    /**
-     * Extracts the list of LookupAnnotation objects representing noun phrases
-     * from the context map.
-     * 
-     * @param contextMap
-     * @return
-     */
-    private List getWindowAnnotations(Map contextMap)
-    {
-        List list = (List) contextMap.get(CTX_KEY_WINDOW_ANNOTATIONS);
-        if ((list == null) || (list.size() == 0))
-        {
-            iv_logger.debug("No context window annotations.");
-            return new ArrayList();
-        }
-        return list;
-    }
-
-    /**
-     * Determines the number of ListTokens are contained within the specified
-     * start and end offsets;
-     * 
-     * @param ltStartOffsetMap
-     * @param ltEndOffsetMap
-     * @param ltListIndexMap
-     * @param startOffset
-     * @param endOffset
-     * @return
-     */
-    private int getNumberOfListTokens(
-            Map ltStartOffsetMap,
-            Map ltEndOffsetMap,
-            Map ltListIndexMap,
-            int startOffset,
-            int endOffset)
-    {
-        List startLookupTokenList = getLookupTokenList(
-                startOffset,
-                ltStartOffsetMap,
-                true);
-        List endLookupTokenList = getLookupTokenList(
-                endOffset,
-                ltEndOffsetMap,
-                false);
-
-        if ((startLookupTokenList == null) || (endLookupTokenList == null))
-        {
-            iv_logger.debug("Invalid window:" + startOffset + "," + endOffset);
-            return -1;
-        }
-        LookupToken startLookupToken = (LookupToken) startLookupTokenList.get(0);
-        Integer startIdx = (Integer) ltListIndexMap.get(startLookupToken);
-
-        LookupToken endLookupToken = (LookupToken) endLookupTokenList.get(endLookupTokenList.size() - 1);
-        Integer endIdx = (Integer) ltListIndexMap.get(endLookupToken);
-
-        return endIdx.intValue() - startIdx.intValue() + 1;
-    }
-
-    /**
-     * Attempts to get a list of LookupToken objects at the specified offset. If
-     * there are none, this method attempts to try nearby offsets based on the
-     * traversal direction.
-     * 
-     * @param offset
-     * @param ltOffsetMap
-     * @param traverseRight
-     * @return
-     */
-    private List getLookupTokenList(
-            int offset,
-            Map ltOffsetMap,
-            boolean traverseRight)
-    {
-        // first attempt the original offset, which will be the case most of the
-        // time
-        List lookupTokenList = (List) ltOffsetMap.get(new Integer(offset));
-        if (lookupTokenList != null)
-        {
-            return lookupTokenList;
-        }
-        else
-        {
-            // otherwise traverse some nearby offsets and attempt to find a
-            // token
-
-            // TODO hardcoded max offset window is 10 char
-            final int offsetWindow = 10;
-
-            // build list of offsets to try
-            List offsetList = new ArrayList();
-            if (traverseRight)
-            {
-                int max = offset + offsetWindow;
-                for (int i = offset; i <= max; i++)
-                {
-                    offsetList.add(new Integer(i));
-                }
-            }
-            else
-            {
-                int min = offset - offsetWindow;
-                for (int i = offset; i >= min; i--)
-                {
-                    offsetList.add(new Integer(i));
-                }
-            }
-
-            Iterator offsetItr = offsetList.iterator();
-            while (offsetItr.hasNext())
-            {
-                Integer tempOffset = (Integer) offsetItr.next();
-                lookupTokenList = (List) ltOffsetMap.get(tempOffset);
-                if (lookupTokenList != null)
-                {
-                    return lookupTokenList;
-                }
-            }
-        }
-        // no tokens in window
-        return null;
-    }
-
-    /**
-     * Determines the largest overlapping window annotation for the specified
-     * LookupToken.
-     * 
-     * @param lt
-     * @param wStartOffsetMap
-     * @param wEndOffsetMap
-     * @return
-     */
-    private LookupAnnotation getLargestWindowAnnotation(
-            int tokenIdx,
-            LookupToken lt,
-            Map ltStartOffsetMap,
-            Map ltEndOffsetMap,
-            Map ltListIndexMap,
-            Map wStartOffsetMap,
-            Map wEndOffsetMap)
-    {
-        Set startCandidateSet = new HashSet();
-        Set endCandidateSet = new HashSet();
-
-        Iterator startItr = wStartOffsetMap.keySet().iterator();
-        while (startItr.hasNext())
-        {
-            Integer startOffset = (Integer) startItr.next();
-            if (startOffset.intValue() <= lt.getStartOffset())
-            {
-                List wAnnotList = (List) wStartOffsetMap.get(startOffset);
-                startCandidateSet.addAll(wAnnotList);
-            }
-        }
-
-        Iterator endItr = wEndOffsetMap.keySet().iterator();
-        while (endItr.hasNext())
-        {
-            Integer endOffset = (Integer) endItr.next();
-            if (endOffset.intValue() >= lt.getEndOffset())
-            {
-                List wAnnotList = (List) wEndOffsetMap.get(endOffset);
-                endCandidateSet.addAll(wAnnotList);
-            }
-        }
-
-        // union to get window annotations that are overlapping with LookupToken
-        startCandidateSet.retainAll(endCandidateSet);
-
-        // find largest overlapping window annotation
-        LookupAnnotation largestWindowAnnot = null;
-        Iterator laItr = startCandidateSet.iterator();
-        while (laItr.hasNext())
-        {
-            LookupAnnotation tempLookupAnnot = (LookupAnnotation) laItr.next();
-            if ((largestWindowAnnot == null)
-                    || (tempLookupAnnot.getLength() > largestWindowAnnot.getLength()))
-            {
-                // now see if we can handle the size of this window (permutation
-                // wise)
-                int ltCount = getNumberOfListTokens(
-                        ltStartOffsetMap,
-                        ltEndOffsetMap,
-                        ltListIndexMap,
-                        tempLookupAnnot.getStartOffset(),
-                        tempLookupAnnot.getEndOffset());
-
-                if ((ltCount <= iv_maxPermutationLevel) && (ltCount > 0))
-                {
-                    largestWindowAnnot = tempLookupAnnot;
-                }
-                else
-                {
-                    if (iv_logger.isDebugEnabled())
-                    {
-                        iv_logger.debug("Window size of "
-                                + ltCount
-                                + " exceeds the max permutation level of "
-                                + iv_maxPermutationLevel
-                                + ".");
-                    }
-                }
-            }
-        }
-
-        return largestWindowAnnot;
-    }
-
-    private int getFixedWindowEndOffset(
-            int tokenIdx,
-            LookupToken lt,
-            List ltList)
-    {
-        int fixedEndOffset = 0;
-
-        for (int i = tokenIdx; (i < tokenIdx + iv_maxPermutationLevel)
-                && (i < ltList.size()); i++)
-        {
-            LookupToken tempLookupToken = (LookupToken) ltList.get(i);
-            if (tempLookupToken != null)
-            {
-                fixedEndOffset = tempLookupToken.getEndOffset();
-            }
-        }
-        return fixedEndOffset;
-    }
-
-    /**
-     * Creates a map that binds an object from a list to its index position.
-     * 
-     * @param list
-     * @return
-     */
-    private Map getListIndexMap(List list)
-    {
-        Map m = new HashMap();
-
-        for (int i = 0; i < list.size(); i++)
-        {
-            Integer index = new Integer(i);
-            m.put(list.get(i), index);
-        }
-
-        return m;
-    }
-
-    /**
-     * Creates a map that uses the start offset to index the LookupAnnotation
-     * objects. If multiple LookupAnnotations can exist at the same start
-     * offset, then hasMultiples=true and the values with be a List of
-     * LookupAnnotation objects at that offset.
-     * 
-     * @param lookupAnnotList
-     * @param hasMultiples
-     * @return
-     */
-    private Map getStartOffsetMap(List lookupAnnotList, boolean hasMultiples)
-    {
-        Map m = new HashMap();
-
-        Iterator laItr = lookupAnnotList.iterator();
-        while (laItr.hasNext())
-        {
-            LookupAnnotation la = (LookupAnnotation) laItr.next();
-            Integer key = new Integer(la.getStartOffset());
-            if (hasMultiples)
-            {
-                List list = (List) m.get(key);
-                if (list == null)
-                {
-                    list = new ArrayList();
-                }
-                list.add(la);
-                m.put(key, list);
-            }
-            else
-            {
-                m.put(key, la);
-            }
-        }
-
-        return m;
-    }
-
-    /**
-     * Creates a map that uses the end offset to index the LookupAnnotation
-     * objects. If multiple LookupAnnotations can exist at the end start offset,
-     * then hasMultiples=true and the values with be a List of LookupAnnotation
-     * objects at that offset.
-     * 
-     * @param lookupAnnotList
-     * @param hasMultiples
-     * @return
-     */
-    private Map getEndOffsetMap(List lookupAnnotList, boolean hasMultiples)
-    {
-        Map m = new HashMap();
-
-        Iterator laItr = lookupAnnotList.iterator();
-        while (laItr.hasNext())
-        {
-            LookupAnnotation la = (LookupAnnotation) laItr.next();
-            Integer key = new Integer(la.getEndOffset());
-            if (hasMultiples)
-            {
-                List list = (List) m.get(key);
-                if (list == null)
-                {
-                    list = new ArrayList();
-                }
-                list.add(la);
-                m.put(key, list);
-            }
-            else
-            {
-                m.put(key, la);
-            }
-        }
-
-        return m;
-    }
-
-    /**
-     * Gets the hits for the specified LookupToken. This uses the first token Dictionary.
-     * 
-     * @param firstLookupToken
-     * @return
-     * @throws Exception
-     */
-    private Collection getFirstTokenHits(LookupToken firstLookupToken)
-            throws Exception
-    {
-        List singleLtList = new ArrayList();
-        singleLtList.add(firstLookupToken);
-
-        String[] phrases = iv_phrBuilder.getPhrases(singleLtList);
-
-        Collection mdhCol = new ArrayList();
-        for (int i = 0; i < phrases.length; i++)
-        {
-            Collection curMdhCol = iv_firstTokenDictEngine.metaLookup(phrases[i]);
-
-            if (curMdhCol.size() > 0)
-            {
-                mdhCol.addAll(curMdhCol);
-            }
-        }
-        return mdhCol;
-    }
-}
\ No newline at end of file
+public class FirstTokenPermutationImpl implements LookupAlgorithm {
+   // LOG4J logger based on class name
+   final private Logger iv_logger = Logger.getLogger( getClass().getName() );
+
+   /**
+    * Key value for context map. Value is expected to be a List of
+    * LookupAnnotation objects in sorted order.
+    */
+   public static final String CTX_KEY_WINDOW_ANNOTATIONS = "WINDOW_ANNOTATIONS";
+
+   /**
+    * Key value for LookupToken attribute. Value is expected to be either TRUE
+    * or FALSE. This indicates whether to use this token for a "first token"
+    * lookup or not. This is optional.
+    */
+   public static final String LT_KEY_USE_FOR_LOOKUP = "USE_FOR_LOOKUP";
+
+   final private DictionaryEngine iv_firstTokenDictEngine;
+   final private PhraseBuilder iv_phrBuilder;
+
+   final private int iv_maxPermutationLevel;
+   // key = level Integer, value = Permutation list
+   final private Map<Integer, List<List<Integer>>> iv_permCacheMap;
+
+   private String[] iv_textMetaFieldNames;
+
+   /**
+    * Constructor
+    *
+    * @param firstTokenDictEngine Dictionary that is indexed against first tokens.
+    * @param phraseBuilder        Builds phrases to match against Dictionary.
+    * @param textMetaFieldNames   MetaFieldNames used to extract presentations.
+    * @param maxPermutationLevel  Max permutation Level allowed.
+    */
+   public FirstTokenPermutationImpl( final DictionaryEngine firstTokenDictEngine,
+                                     final PhraseBuilder phraseBuilder,
+                                     final String textMetaFieldNames[],
+                                     final int maxPermutationLevel ) {
+      iv_firstTokenDictEngine = firstTokenDictEngine;
+      iv_phrBuilder = phraseBuilder;
+      iv_textMetaFieldNames = textMetaFieldNames;
+
+      iv_maxPermutationLevel = maxPermutationLevel;
+      iv_permCacheMap = new HashMap<Integer, List<List<Integer>>>( maxPermutationLevel );
+      for ( int i = 0; i <= maxPermutationLevel; i++ ) {
+         final List<List<Integer>> permList = PermutationUtil.getPermutationList( i );
+         iv_permCacheMap.put( i, permList );
+      }
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<LookupHit> lookup( final List<LookupToken> lookupTokenList,
+                                        final Map<String, List<LookupAnnotation>> contextMap ) throws Exception {
+      // setup optional window context data
+      final List<LookupAnnotation> windowAnnotations = getWindowAnnotations( contextMap );
+      final boolean useWindowAnnots = !windowAnnotations.isEmpty();
+      // map of all the annotation start indices as keys and the annotations with those indices as values
+      final Map<Integer, List<LookupAnnotation>> wStartOffsetMap = getMultipleStartOffsetMap( windowAnnotations );
+      // map of all the annotation end indices as keys and the annotations with those indices as values
+      final Map<Integer, List<LookupAnnotation>> wEndOffsetMap = getMultipleEndOffsetMap( windowAnnotations );
+      // map of all lookupTokens and their index within the lookupTokenList.  Faster for fetching than List.indexOf(..)
+      final Map<LookupToken, Integer> ltListIndexMap = getListIndexMap( lookupTokenList );
+      // map of all the token start indices as keys and the tokens with those indices as values
+      final Map<Integer, List<LookupToken>> ltStartOffsetMap = getMultipleStartOffsetMap( lookupTokenList );
+      // map of all the token end indices as keys and the tokens with those indices as values
+      final Map<Integer, List<LookupToken>> ltEndOffsetMap = getMultipleEndOffsetMap( lookupTokenList );
+
+      final List<LookupHit> lookupHits = new ArrayList<LookupHit>();
+      for ( int currentIndex = 0; currentIndex < lookupTokenList.size(); currentIndex++ ) {
+         final LookupToken lookupToken = lookupTokenList.get( currentIndex );
+         // TODO a bug?  Program flow is possibly not performing as expected.
+         // Boolean valueOf() always returns true or false.  If passed a null parameter it returns false.
+         // two lines down a -null return- is treated the same way as true... are we expecting to
+         // perform a certain way if the attribute does not exist, that being the same behavior
+         // as for the attribute being true?  If so, then this is a bug.
+         // I have left the original code, and refactored to use the actual original behavior,
+         // not the original (possible) expected behavior.  12-26-2012 SPF
+         //         Boolean useForLookup = Boolean.valueOf( lookupToken.getStringAttribute( LT_KEY_USE_FOR_LOOKUP ) );
+         //         if ( (useForLookup == null) || (useForLookup.booleanValue()) ) {
+         final String useForLookupString = lookupToken.getStringAttribute( LT_KEY_USE_FOR_LOOKUP );
+         final boolean useForLookup = Boolean.valueOf( useForLookupString );
+         if ( !useForLookup ) {
+            continue;
+         }
+         final Collection<MetaDataHit> firstTokenHits = getFirstTokenHits( lookupToken );
+         if ( firstTokenHits == null || firstTokenHits.isEmpty() ) {
+            continue;
+         }
+         int wEndOffset = -1;
+         if ( useWindowAnnots ) {
+            // get the largest overlapping window annotation
+            final LookupAnnotation windowAnnotation = getLargestWindowAnnotation( currentIndex, lookupToken,
+                                                                                 ltStartOffsetMap, ltEndOffsetMap,
+                                                                                 ltListIndexMap,
+                                                                                 wStartOffsetMap, wEndOffsetMap );
+            if ( windowAnnotation != null ) {
+               wEndOffset = windowAnnotation.getEndOffset();
+            }
+         }
+         if ( wEndOffset == -1 ) {
+            iv_logger.debug( "Window size set to max perm level." );
+            wEndOffset = getFixedWindowEndOffset( currentIndex, lookupToken, lookupTokenList );
+         }
+         final List<LookupToken> endLookupTokenList = getLookupTokenList( wEndOffset, ltEndOffsetMap, false );
+         if ( endLookupTokenList.isEmpty() ) {
+            iv_logger.debug( "Invalid window:" + currentIndex + "," + wEndOffset );
+            continue;
+         }
+         final LookupToken endLookupToken = endLookupTokenList.get( endLookupTokenList.size() - 1 );
+         final int startTokenIndex = currentIndex;
+         final int endTokenIndex = ltListIndexMap.get( endLookupToken );
+         // list of LookupToken objects bound by the window
+         final List<LookupToken> wLookupTokenList = lookupTokenList.subList( startTokenIndex, endTokenIndex + 1 );
+         // use permutation algorithm to find any hits inside the window
+         // Note: currentIndex - startTokenIndex is always = 0. What was the intention?  12-26-2012 SPF
+         final Collection<LookupHit> lhCol = getLookupHits( firstTokenHits, wLookupTokenList,
+                                                            currentIndex - startTokenIndex );
+         lookupHits.addAll( lhCol );
+      }
+      return lookupHits;
+   }
+
+   private Map<String,Set<MetaDataHit>> getNamedMetaDataHits( final Collection<MetaDataHit> firstTokenHits ) {
+      final Map<String,Set<MetaDataHit>> namedMetaDataHits = new HashMap<String,Set<MetaDataHit>>();
+      for ( MetaDataHit firstTokenHit : firstTokenHits ) {
+         for ( String name : iv_textMetaFieldNames ) {
+            String text = firstTokenHit.getMetaFieldValue( name );
+            if ( text != null ) {
+               text = text.toLowerCase();
+               Set<MetaDataHit> mdhSet = namedMetaDataHits.get( text );
+               if ( mdhSet == null ) {
+                  mdhSet = new HashSet<MetaDataHit>();
+               }
+               mdhSet.add( firstTokenHit );
+               namedMetaDataHits.put( text, mdhSet );
+            } else {
+               if ( iv_logger.isDebugEnabled() ) {
+                  iv_logger.debug( "MetaField " + name + " contains no data." );
+               }
+            }
+         }
+      }
+      return namedMetaDataHits;
+   }
+
+   private Collection<LookupHit> getLookupHits( final Collection<MetaDataHit> firstTokenHits,
+                                                final List<LookupToken> wLookupTokenList,
+                                                final int firstTokenIndex ) throws Exception {
+      if ( wLookupTokenList.size() - 1 > iv_maxPermutationLevel ) {
+         iv_logger.debug( "Beyond permutation cache size." );
+         return Collections.emptyList();
+      }
+      final Map<String,Set<MetaDataHit>> namedMetaDataHits = getNamedMetaDataHits( firstTokenHits );
+
+      final List<LookupHit> lookupHits = new ArrayList<LookupHit>();
+      final LookupToken firstWordLookupToken = wLookupTokenList.get( firstTokenIndex );
+      int permutationIndex = wLookupTokenList.size();
+      if ( firstTokenIndex < wLookupTokenList.size() && permutationIndex > 0 ) {
+         permutationIndex--;
+      }
+      final List<List<Integer>> permutationList = iv_permCacheMap.get( permutationIndex );
+      for ( List<Integer> permutations : permutationList ) {
+         // convert permutation idx back into LookupTokens
+         final List<LookupToken> tempLookupTokens = new ArrayList<LookupToken>();
+         for ( Integer idx : permutations ) {
+            if ( idx <= firstTokenIndex ) {
+               idx--;
+            }
+            final LookupToken lookupToken = wLookupTokenList.get( idx );
+            tempLookupTokens.add( lookupToken );
+         }
+
+         final List<LookupToken> singleTokenList = Arrays.asList( firstWordLookupToken );
+         final String[] firstWordPhrases = iv_phrBuilder.getPhrases( singleTokenList );
+         final String[] lookupTokenPhrases = iv_phrBuilder.getPhrases( tempLookupTokens );
+         for ( String lookupTokenPhrase : lookupTokenPhrases ) {
+            // perform trim() and toLowerCase() here instead of repeating in each inner loop
+            lookupTokenPhrase = lookupTokenPhrase.toLowerCase();
+            for ( String firstWordPhrase : firstWordPhrases ) {
+               // perform trim() and toLowerCase() here so it isn't done for the whole concatenated string
+               firstWordPhrase = firstWordPhrase.toLowerCase();
+               final StringBuilder phraseSB = new StringBuilder();
+               phraseSB.append( firstWordPhrase ).append( ' ' ).append( lookupTokenPhrase );
+               final String fullPhrase = phraseSB.toString().trim();
+               final Set<MetaDataHit> mdhSet = namedMetaDataHits.get( fullPhrase );
+               if ( mdhSet == null ) {
+                  continue;
+               }
+               for ( MetaDataHit mdh : mdhSet ) {
+                  // figure out start and end offsets -- does List permutations change per iteration?
+                  // TODO Why is this not extracted?  Why sort for every (inner inner) iteration ?
+                  Collections.sort( permutations );
+
+                  int startOffset;
+                  if ( !permutations.isEmpty() ) {
+                     int firstIdx = permutations.get( 0 );
+                     if ( firstIdx <= firstTokenIndex ) {
+                        firstIdx--;
+                     }
+                     final LookupToken lt = wLookupTokenList.get( firstIdx );
+                     if ( lt.getStartOffset() < firstWordLookupToken.getStartOffset() ) {
+                        startOffset = lt.getStartOffset();
+                     } else {
+                        startOffset = firstWordLookupToken.getStartOffset();
+                     }
+                  } else {
+                     startOffset = firstWordLookupToken.getStartOffset();
+                  }
+
+                  int endOffset;
+                  if ( !permutations.isEmpty() ) {
+                     int lastIdx = permutations.get( permutations.size() - 1 );
+                     if ( lastIdx <= firstTokenIndex ) {
+                        lastIdx--;
+                     }
+                     final LookupToken lt = wLookupTokenList.get( lastIdx );
+                     if ( lt.getEndOffset() > firstWordLookupToken.getEndOffset() ) {
+                        endOffset = lt.getEndOffset();
+                     } else {
+                        endOffset = firstWordLookupToken.getEndOffset();
+                     }
+                  } else {
+                     endOffset = firstWordLookupToken.getEndOffset();
+                  }
+
+                  final LookupHit lh = new LookupHit( mdh, startOffset, endOffset );
+                  lookupHits.add( lh );
+               }
+            }
+         }
+      }
+      return lookupHits;
+   }
+
+   /**
+    * Extracts the list of LookupAnnotation objects representing noun phrases
+    * from the context map.
+    *
+    * @param contextMap Map where key=Impl specific String object and value=List of
+    *                   LookupAnnotation objects
+    * @return list of window annotations or empty list if null
+    */
+   private List<LookupAnnotation> getWindowAnnotations( final Map<String, List<LookupAnnotation>> contextMap ) {
+      final List<LookupAnnotation> list = contextMap.get( CTX_KEY_WINDOW_ANNOTATIONS );
+      if ( list == null || list.isEmpty() ) {
+         iv_logger.debug( "No context window annotations." );
+         return Collections.emptyList();
+      }
+      return list;
+   }
+
+   /**
+    * Determines the number of ListTokens are contained within the specified
+    * start and end offsets;
+    *
+    * @param ltStartOffsetMap -
+    * @param ltEndOffsetMap   -
+    * @param ltListIndexMap   -
+    * @param startOffset      -
+    * @param endOffset        -
+    * @return                 -
+    */
+   private int getNumberOfListTokens( final Map<Integer, List<LookupToken>> ltStartOffsetMap,
+                                      final Map<Integer, List<LookupToken>> ltEndOffsetMap,
+                                      final Map<LookupToken, Integer> ltListIndexMap,
+                                      final int startOffset, final int endOffset ) {
+      final List<LookupToken> startLookupTokenList = getLookupTokenList( startOffset, ltStartOffsetMap, true );
+      final List<LookupToken> endLookupTokenList = getLookupTokenList( endOffset, ltEndOffsetMap, false );
+
+      if ( startLookupTokenList.isEmpty() || endLookupTokenList.isEmpty() ) {
+         iv_logger.debug( "Invalid window:" + startOffset + "," + endOffset );
+         return -1;
+      }
+      final LookupToken startLookupToken = startLookupTokenList.get( 0 );
+      final Integer startIdx = ltListIndexMap.get( startLookupToken );
+
+      final LookupToken endLookupToken = endLookupTokenList.get( endLookupTokenList.size() - 1 );
+      final Integer endIdx = ltListIndexMap.get( endLookupToken );
+
+      return endIdx - startIdx + 1;
+   }
+
+   /**
+    * Attempts to get a list of LookupToken objects at the specified offset. If
+    * there are none, this method attempts to try nearby offsets based on the
+    * traversal direction.
+    *
+    * @param offset -
+    * @param ltOffsetMap -
+    * @param traverseRight -
+    * @return list of lookup tokens in window, never null
+    */
+   private List<LookupToken> getLookupTokenList( final int offset,
+                                                 final Map<Integer, List<LookupToken>> ltOffsetMap,
+                                                 final boolean traverseRight ) {
+      // first attempt the original offset, which will be the case most of the time
+      List<LookupToken> lookupTokenList = ltOffsetMap.get( offset );
+      if ( lookupTokenList != null ) {
+         return lookupTokenList;
+      }
+      // otherwise traverse some nearby offsets and attempt to find a token
+      // TODO hardcoded max offset window is 10 char
+      final int offsetWindow = 10;
+      if ( traverseRight ) {
+         final int max = offset + offsetWindow;
+         for ( int i = offset; i <= max; i++ ) {
+            lookupTokenList = ltOffsetMap.get( i );
+            if ( lookupTokenList != null ) {
+               return lookupTokenList;
+            }
+         }
+      } else {
+         final int min = offset - offsetWindow;
+         for ( int i = offset; i >= min; i-- ) {
+            lookupTokenList = ltOffsetMap.get( i );
+            if ( lookupTokenList != null ) {
+               return lookupTokenList;
+            }
+         }
+      }
+      // no tokens in window - return an empty list, not null
+      return Collections.emptyList();
+   }
+
+   /**
+    * Determines the largest overlapping window annotation for the specified
+    * LookupToken.
+    */
+   private LookupAnnotation getLargestWindowAnnotation( final int tokenIdx, final LookupToken lt,
+                                                        final Map<Integer, List<LookupToken>> ltStartOffsetMap,
+                                                        final Map<Integer, List<LookupToken>> ltEndOffsetMap,
+                                                        final Map<LookupToken, Integer> ltListIndexMap,
+                                                        final Map<Integer, List<LookupAnnotation>> wStartOffsetMap,
+                                                        final Map<Integer, List<LookupAnnotation>> wEndOffsetMap ) {
+      final Set<LookupAnnotation> startCandidateSet = new HashSet<LookupAnnotation>();
+      final Set<LookupAnnotation> endCandidateSet = new HashSet<LookupAnnotation>();
+
+      for ( Map.Entry<Integer, List<LookupAnnotation>> entry : wStartOffsetMap.entrySet() ) {
+         final Integer startOffset = entry.getKey();
+         if ( startOffset <= lt.getStartOffset() ) {
+            startCandidateSet.addAll( entry.getValue() );
+         }
+      }
+      for ( Map.Entry<Integer, List<LookupAnnotation>> entry : wEndOffsetMap.entrySet() ) {
+         final Integer endOffset = entry.getKey();
+         if ( endOffset >= lt.getEndOffset() ) {
+            endCandidateSet.addAll( entry.getValue() );
+         }
+      }
+      // union to get window annotations that are overlapping with LookupToken
+      startCandidateSet.retainAll( endCandidateSet );
+
+      // find largest overlapping window annotation
+      LookupAnnotation largestWindowAnnot = null;
+      for ( LookupAnnotation tempLookupAnnot : startCandidateSet ) {
+         if ( largestWindowAnnot == null || tempLookupAnnot.getLength() > largestWindowAnnot.getLength() ) {
+            // now see if we can handle the size of this window (permutation wise)
+            final int ltCount = getNumberOfListTokens( ltStartOffsetMap, ltEndOffsetMap, ltListIndexMap,
+                                                       tempLookupAnnot.getStartOffset(),
+                                                       tempLookupAnnot.getEndOffset() );
+
+            if ( ltCount <= iv_maxPermutationLevel && ltCount > 0 ) {
+               largestWindowAnnot = tempLookupAnnot;
+            } else if ( iv_logger.isDebugEnabled() ) {
+               iv_logger.debug( "Window size of " + ltCount
+                                + " exceeds the max permutation level of " + iv_maxPermutationLevel + "." );
+            }
+         }
+      }
+      return largestWindowAnnot;
+   }
+
+   private int getFixedWindowEndOffset( final int tokenIdx, final LookupToken lt, final List<LookupToken> ltList ) {
+      // This iterates to the last index, then returns the last valid offset.
+      // If we were performing max() this might be understandable ...
+      //      int fixedEndOffset = 0;
+      //      for ( int i = tokenIdx; (i < tokenIdx + iv_maxPermutationLevel)
+      //            && (i < ltList.size()); i++ ) {
+      //         LookupToken tempLookupToken = (LookupToken) ltList.get( i );
+      //         if ( tempLookupToken != null ) {
+      //            fixedEndOffset = tempLookupToken.getEndOffset();
+      //         }
+      //      }
+      //      return fixedEndOffset;
+
+      // Go backward and return the first valid end offset ...
+      final int count = Math.min( tokenIdx + iv_maxPermutationLevel, ltList.size() );
+      if ( count <= 0 ) {
+         return 0;
+      }
+      for ( int i = count - 1; i >= 0; i-- ) {
+         final LookupToken tempLookupToken = ltList.get( i );
+         if ( tempLookupToken != null ) {
+            return tempLookupToken.getEndOffset();
+         }
+      }
+      return 0;
+   }
+
+   /**
+    * Creates a map that binds an object from a list to its index position.
+    *
+    * @param list -
+    * @return -
+    */
+   static private Map<LookupToken, Integer> getListIndexMap( final List<LookupToken> list ) {
+      final Map<LookupToken, Integer> m = new HashMap<LookupToken, Integer>( list.size() );
+      for ( int i = 0; i < list.size(); i++ ) {
+         m.put( list.get( i ), i );
+      }
+      return m;
+   }
+
+   /**
+    * Creates a map that uses the start offset to index the LookupAnnotation objects.
+    *
+    * @param lookupAnnotList -
+    * @return map of integers and lookup annotations
+    */
+   static private <T extends LookupAnnotation> Map<Integer, T> getSingleStartOffsetMap( final List<T> lookupAnnotList ) {
+      final Map<Integer, T> m = new HashMap<Integer, T>();
+      for ( T lookupAnnotation : lookupAnnotList ) {
+         final Integer key = lookupAnnotation.getStartOffset();
+         m.put( key, lookupAnnotation );
+      }
+      return m;
+   }
+
+   /**
+    * Creates a map that uses the start offset to index the LookupAnnotation objects.
+    *
+    * @param lookupAnnotList -
+    * @return map of integers and lookup annotation lists
+    */
+   static private <T extends LookupAnnotation> Map<Integer, List<T>> getMultipleStartOffsetMap( final List<T> lookupAnnotList ) {
+      final Map<Integer, List<T>> m = new HashMap<Integer, List<T>>();
+      for ( T lookupAnnotation : lookupAnnotList ) {
+         final Integer key = lookupAnnotation.getStartOffset();
+         List<T> list = m.get( key );
+         if ( list == null ) {
+            list = new ArrayList<T>();
+         }
+         list.add( lookupAnnotation );
+         m.put( key, list );
+      }
+      return m;
+   }
+
+   /**
+    * Creates a map that uses the end offset to index the LookupAnnotation objects.
+    *
+    * @param lookupAnnotList -
+    * @return map of integers and lookup annotations
+    */
+   static private <T extends LookupAnnotation> Map<Integer, T> getSingleEndOffsetMap( final List<T> lookupAnnotList ) {
+      final Map<Integer, T> m = new HashMap<Integer, T>();
+      for ( T lookupAnnotation : lookupAnnotList ) {
+         final Integer key = lookupAnnotation.getEndOffset();
+         m.put( key, lookupAnnotation );
+      }
+      return m;
+   }
+
+   /**
+    * Creates a map that uses the end offset to index the LookupAnnotation objects.
+    *
+    * @param lookupAnnotList -
+    * @return map of integers and lookup annotation lists
+    */
+   static private <T extends LookupAnnotation> Map<Integer, List<T>> getMultipleEndOffsetMap( final List<T> lookupAnnotList ) {
+      final Map<Integer, List<T>> m = new HashMap<Integer, List<T>>();
+      for ( T lookupAnnotation : lookupAnnotList ) {
+         final Integer key = lookupAnnotation.getEndOffset();
+         List<T> list = m.get( key );
+         if ( list == null ) {
+            list = new ArrayList<T>();
+         }
+         list.add( lookupAnnotation );
+         m.put( key, list );
+      }
+      return m;
+   }
+
+
+   /**
+    * Gets the hits for the specified LookupToken. This uses the first token Dictionary.
+    *
+    * @param firstLookupToken -
+    * @return -
+    * @throws Exception
+    */
+   private Collection<MetaDataHit> getFirstTokenHits( final LookupToken firstLookupToken ) throws Exception {
+      final List<LookupToken> singleTokenList = Arrays.asList( firstLookupToken );
+      final String[] phrases = iv_phrBuilder.getPhrases( singleTokenList );
+      final List<MetaDataHit> metaDataHits = new ArrayList<MetaDataHit>();
+      for ( String phrase : phrases ) {
+         final Collection<MetaDataHit> phraseMetaDataHits = iv_firstTokenDictEngine.metaLookup( phrase );
+         if ( !phraseMetaDataHits.isEmpty() ) {
+            metaDataHits.addAll( phraseMetaDataHits );
+         }
+      }
+      return metaDataHits;
+   }
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/LookupAlgorithm.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/LookupAlgorithm.java?rev=1446793&r1=1446792&r2=1446793&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/LookupAlgorithm.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/LookupAlgorithm.java Fri Feb 15 22:26:53 2013
@@ -18,6 +18,10 @@
  */
 package org.apache.ctakes.dictionary.lookup.algorithms;
 
+import org.apache.ctakes.dictionary.lookup.vo.LookupAnnotation;
+import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
+import org.apache.ctakes.dictionary.lookup.vo.LookupToken;
+
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@@ -41,6 +45,6 @@ public interface LookupAlgorithm
      * @return Collection of LookupHits.
      * @throws Exception
      */
-    public Collection lookup(List lookupTokenList, Map contextMap)
-            throws Exception;
+    public Collection<LookupHit> lookup( List<LookupToken> lookupTokenList,
+                                         Map<String,List<LookupAnnotation>> contextMap) throws Exception;
 }



Mime
View raw message