incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1449951 [1/2] - in /incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup: ./ ae/ algorithms/ filter/ lucene/ phrasebuilder/ strtable/ vo/
Date Mon, 25 Feb 2013 22:50:43 GMT
Author: seanfinan
Date: Mon Feb 25 22:50:42 2013
New Revision: 1449951

URL: http://svn.apache.org/r1449951
Log:
CTAKES-159 : Added some typing
CTAKES-160 : Refined some Exception try/catch and throws
CTAKES-161 : modernizing for jdk 1.5+
Some improvements to the dictionary-lookup led to refactoring
Some faster consumer implementations (pulled redundancies out of inner iterations)

Added:
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java
      - copied, changed from r1449821, incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java
      - copied, changed from r1449821, incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilities.java
Modified:
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedDbConsumerImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/FirstTokenPermutationImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/filter/StringPreLookupFilterImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/lucene/LuceneDocumentMetaDataHitImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/phrasebuilder/VariantPhraseBuilderImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/strtable/StringTableRowMetaDataHitImpl.java
    incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/vo/LookupHit.java

Copied: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java (from r1449821, incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java)
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java?p2=incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java&p1=incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java&r1=1449821&r2=1449951&rev=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java Mon Feb 25 22:50:42 2013
@@ -25,41 +25,20 @@ import java.util.Collection;
  * 
  * @author Mayo Clinic
  */
-public abstract class BaseMetaDataHitImpl implements MetaDataHit
-{
-    /**
-     * Two MetaDataHits are equal if their Meta field name/value pairs
-     * are equal.
-     */
-    // In that case, this code is broken.  Note that this can contain all of those, but that may not contain all of these
-//    public boolean equals(MetaDataHit mdh)
-//    {
-//        // check names first
-//        if (getMetaFieldNames().containsAll(mdh.getMetaFieldNames()))
-//        {
-//            // check values
-//            if (getMetaFieldValues().containsAll(mdh.getMetaFieldValues()))
-//            {
-//                return true;
-//            }
-//        }
-//
-//        return false;
-//    }
+public abstract class AbstractBaseMetaDataHit implements MetaDataHit {
+   private int _hashCode = Integer.MIN_VALUE;
 
    /**
     * Two MetaDataHits are equal if their Meta field name/value pairs
     * are equal.
     */
     public boolean equals( final MetaDataHit mdh ) {
-       // Still not great as two equal names could have swapped equal values, but fast if complete check isn't required
        if ( getMetaFieldNames().size() != mdh.getMetaFieldNames().size()
              || getMetaFieldValues().size() != mdh.getMetaFieldValues().size()
-             // TODO add types to MetaDataHit
              || !getMetaFieldNames().containsAll( mdh.getMetaFieldNames() ) ) {
           return false;
        }
-       final Collection<String> thisMetaFieldNames = (Collection<String>)getMetaFieldNames();
+       final Collection<String> thisMetaFieldNames = getMetaFieldNames();
        for ( String name : thisMetaFieldNames ) {
           if ( !getMetaFieldValue( name ).equals( mdh.getMetaFieldValue( name ) ) ) {
              return false;
@@ -68,12 +47,7 @@ public abstract class BaseMetaDataHitImp
        return true;
     }
 
-
    // Added 12-17-2012 to increase duplicate filtering in DictionaryLookupAnnotator
-   // TODO As far as I have seen, instances of MetaDataHit are immutable (and should be so annotated)
-   // If MetaDataHit ever becomes mutable then the hashCode may need to be reset upon mutation
-   private int _hashCode = Integer.MIN_VALUE;
-
    @Override
    public int hashCode() {
       if ( _hashCode == Integer.MIN_VALUE ) {

Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java?rev=1449951&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java (added)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java Mon Feb 25 22:50:42 2013
@@ -0,0 +1,87 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.dictionary.lookup;
+
+import java.util.Collection;
+
+/**
+ * Base impl for a MetaDataHit implementation.
+ * 
+ * @author Mayo Clinic
+ * @deprecated please use {@link AbstractBaseMetaDataHit}
+ */
+@Deprecated
+// Renamed AbstractBaseDictionary as this is not a full implementation. - 2/25/2013 SPF
+public abstract class BaseMetaDataHitImpl implements MetaDataHit {
+    /**
+     * Two MetaDataHits are equal if their Meta field name/value pairs
+     * are equal.
+     */
+    // In that case, this code is broken.  Note that this can contain all of those, but that may not contain all of these
+//    public boolean equals(MetaDataHit mdh)
+//    {
+//        // check names first
+//        if (getMetaFieldNames().containsAll(mdh.getMetaFieldNames()))
+//        {
+//            // check values
+//            if (getMetaFieldValues().containsAll(mdh.getMetaFieldValues()))
+//            {
+//                return true;
+//            }
+//        }
+//
+//        return false;
+//    }
+
+   /**
+    * Two MetaDataHits are equal if their Meta field name/value pairs
+    * are equal.
+    */
+    public boolean equals( final MetaDataHit mdh ) {
+       // Still not great as two equal names could have swapped equal values, but fast if complete check isn't required
+       if ( getMetaFieldNames().size() != mdh.getMetaFieldNames().size()
+             || getMetaFieldValues().size() != mdh.getMetaFieldValues().size()
+             // TODO add types to MetaDataHit
+             || !getMetaFieldNames().containsAll( mdh.getMetaFieldNames() ) ) {
+          return false;
+       }
+       final Collection<String> thisMetaFieldNames = (Collection<String>)getMetaFieldNames();
+       for ( String name : thisMetaFieldNames ) {
+          if ( !getMetaFieldValue( name ).equals( mdh.getMetaFieldValue( name ) ) ) {
+             return false;
+          }
+       }
+       return true;
+    }
+
+
+   // Added 12-17-2012 to increase duplicate filtering in DictionaryLookupAnnotator
+   // TODO As far as I have seen, instances of MetaDataHit are immutable (and should be so annotated)
+   // If MetaDataHit ever becomes mutable then the hashCode may need to be reset upon mutation
+   private int _hashCode = Integer.MIN_VALUE;
+
+   @Override
+   public int hashCode() {
+      if ( _hashCode == Integer.MIN_VALUE ) {
+         _hashCode = 27 * getMetaFieldNames().hashCode() + getMetaFieldValues().hashCode();
+      }
+      return _hashCode;
+   }
+
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java Mon Feb 25 22:50:42 2013
@@ -18,36 +18,44 @@
  */
 package org.apache.ctakes.dictionary.lookup;
 
+import javax.annotation.concurrent.Immutable;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
 
 /**
- * 
  * @author Mayo Clinic
  */
-public class GenericMetaDataHitImpl extends BaseMetaDataHitImpl
-        implements MetaDataHit
-{
-    private Map iv_nameValueMap;
-
-    public GenericMetaDataHitImpl(Map metaNameValueMap)
-    {
-        iv_nameValueMap = metaNameValueMap;
-    }
-
-    public String getMetaFieldValue(String metaFieldName)
-    {
-        return (String) iv_nameValueMap.get(metaFieldName);
-    }
-
-    public Set getMetaFieldNames()
-    {
-        return iv_nameValueMap.keySet();
-    }
-
-    public Collection getMetaFieldValues()
-    {
-        return iv_nameValueMap.values();
-    }
-}
\ No newline at end of file
+@Immutable
+public final class GenericMetaDataHitImpl extends AbstractBaseMetaDataHit implements MetaDataHit {
+   private final Map<String, String> _nameValueMap;
+
+   public GenericMetaDataHitImpl( final Map<String,String> metaNameValueMap ) {
+      _nameValueMap = Collections.unmodifiableMap( metaNameValueMap );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public String getMetaFieldValue( final String metaFieldName ) {
+      return _nameValueMap.get( metaFieldName );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Set<String> getMetaFieldNames() {
+      return _nameValueMap.keySet();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public Collection<String> getMetaFieldValues() {
+      return _nameValueMap.values();
+   }
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java Mon Feb 25 22:50:42 2013
@@ -27,11 +27,11 @@ import java.util.Set;
  */
 public interface MetaDataHit
 {
-    public Set getMetaFieldNames();
+    public Set<String> getMetaFieldNames();
     
-    public Collection getMetaFieldValues();
+    public Collection<String> getMetaFieldValues();
 
     public String getMetaFieldValue(String metaFieldName);
 	
-	public boolean equals(MetaDataHit mdh);
+	 public boolean equals(MetaDataHit mdh);
 }

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java Mon Feb 25 22:50:42 2013
@@ -18,68 +18,71 @@
  */
 package org.apache.ctakes.dictionary.lookup.ae;
 
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-
 import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.jcas.JCas;
-
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+
+import java.util.*;
+
 
 /**
  * Provides some base functionality for subclasses.
- * 
+ *
  * @author Mayo Clinic
- * 
  */
-public abstract class BaseLookupConsumerImpl implements LookupConsumer
-{
-	/**
-	 * Organizes the LookupHit objects by begin and end offsets.
-	 * 
-	 * @param lhItr
-	 * @return Iterator over Set objects. Each Set object is a collection of
-	 *         LookupHit objects with the same begin,end offsets.
-	 */
-	protected Iterator organizeByOffset(Iterator lhItr)
-	{
-		// key = begin,end key (java.lang.String)
-		// val = Set of LookupHit objects corresponding to begin,end
-		Map m = new HashMap();
-
-		while (lhItr.hasNext())
-		{
-			LookupHit lh = (LookupHit) lhItr.next();
-			String keyStr = getKeyString(lh.getStartOffset(), lh.getEndOffset());
-
-			Set s = null;
-			if (m.containsKey(keyStr))
-			{
-				s = (Set) m.get(keyStr);
-			}
-			else
-			{
-				s = new HashSet();
-			}
-			s.add(lh);
-			m.put(keyStr, s);
-		}
-
-		return m.values().iterator();
-	}
-
-	private String getKeyString(int begin, int end)
-	{
-		StringBuffer sb = new StringBuffer();
-		sb.append(begin);
-		sb.append(',');
-		sb.append(end);
-		return sb.toString();
-	}
+// TODO rename this class properly: AbstractBaseLookupConsumer.  Requires refactoring outside module
+public abstract class BaseLookupConsumerImpl implements LookupConsumer {
+   /**
+    * Organizes the LookupHit objects by begin and end offsets.
+    *
+    * @param lookupHitIterator -
+    * @return Iterator over Set objects. Each Set object is a collection of
+    *         LookupHit objects with the same begin,end offsets.
+    */
+   static protected Iterator organizeByOffset( final Iterator<LookupHit> lookupHitIterator ) {
+      final  Map<LookupHitKey, Set<LookupHit>> lookupHitMap = createLookupHitMap( lookupHitIterator );
+      return lookupHitMap.values().iterator();
+   }
+
+   static protected Map<LookupHitKey, Set<LookupHit>> createLookupHitMap( final Iterator<LookupHit> lookupHitIterator ) {
+      final Map<LookupHitKey, Set<LookupHit>> lookupHitMap = new HashMap<LookupHitKey, Set<LookupHit>>();
+      while ( lookupHitIterator.hasNext() ) {
+         final LookupHit lookupHit = lookupHitIterator.next();
+         final LookupHitKey key = new LookupHitKey( lookupHit );
+         Set<LookupHit> lookupHits = lookupHitMap.get( key );
+         if ( lookupHits == null ) {
+            lookupHits = new HashSet<LookupHit>();
+            lookupHitMap.put( key, lookupHits );
+         }
+         lookupHits.add( lookupHit );
+      }
+      return lookupHitMap;
+   }
+
+   /**
+    * Using a String as a HashMap Key can be slow as
+    * the hashCode is computed per character with each call - ditto for equals
+    */
+   static protected class LookupHitKey {
+      final protected int __start;
+      final protected int __end;
+      final private int __hashCode;
+
+      private LookupHitKey( final LookupHit lookupHit ) {
+         __start = lookupHit.getStartOffset();
+         __end = lookupHit.getEndOffset();
+         __hashCode = 1000 * __end + __start;
+      }
+
+      public int hashCode() {
+         return __hashCode;
+      }
+
+      public boolean equals( final Object object ) {
+         return object instanceof LookupHitKey
+               && __start == ((LookupHitKey) object).__start
+               && __end == ((LookupHitKey) object).__end;
+      }
+   }
 
-	public abstract void consumeHits(JCas jcas, Iterator lookupHitItr)
-			throws AnalysisEngineProcessException;
 }

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java Mon Feb 25 22:50:42 2013
@@ -18,56 +18,72 @@
  */
 package org.apache.ctakes.dictionary.lookup.ae;
 
-import java.util.HashMap;
-import java.util.Map;
-
 import org.apache.ctakes.dictionary.lookup.vo.LookupAnnotation;
 import org.apache.ctakes.dictionary.lookup.vo.LookupToken;
 import org.apache.uima.jcas.tcas.Annotation;
 
+import java.util.HashMap;
+import java.util.Map;
+
 
 /**
  * @author Mayo Clinic
- * 
  */
-public class LookupAnnotationToJCasAdapter implements LookupAnnotation, LookupToken
-{
-    private Map iv_attrMap = new HashMap();
-
-    private Annotation iv_jcasAnnotObj;
-
-    public LookupAnnotationToJCasAdapter(Annotation jcasAnnotObj)
-    {
-        iv_jcasAnnotObj = jcasAnnotObj;
-    }
-
-    public void addStringAttribute(String attrKey, String attrVal)
-    {
-        iv_attrMap.put(attrKey, attrVal);
-    }
-
-    public int getEndOffset()
-    {
-        return iv_jcasAnnotObj.getEnd();
-    }
-
-    public int getLength()
-    {
-        return getStartOffset() - getEndOffset();
-    }
-
-    public int getStartOffset()
-    {
-        return iv_jcasAnnotObj.getBegin();
-    }
-
-    public String getStringAttribute(String attrKey)
-    {
-        return (String) iv_attrMap.get(attrKey);
-    }
-
-    public String getText()
-    {
-        return iv_jcasAnnotObj.getCoveredText();
-    }
+public class LookupAnnotationToJCasAdapter implements LookupAnnotation, LookupToken {
+
+   final private Map<String, String> _attributeMap;
+   final private Annotation _jcasAnnotation;
+
+   public LookupAnnotationToJCasAdapter( final Annotation jcasAnnotation ) {
+      _jcasAnnotation = jcasAnnotation;
+      _attributeMap = new HashMap<String, String>();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void addStringAttribute( final String attrKey, final String attrVal ) {
+      _attributeMap.put( attrKey, attrVal );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getEndOffset() {
+      return _jcasAnnotation.getEnd();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getLength() {
+      return getStartOffset() - getEndOffset();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public int getStartOffset() {
+      return _jcasAnnotation.getBegin();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public String getStringAttribute( final String attrKey ) {
+      return _attributeMap.get( attrKey );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public String getText() {
+      return _jcasAnnotation.getCoveredText();
+   }
 }

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java Mon Feb 25 22:50:42 2013
@@ -20,8 +20,8 @@ package org.apache.ctakes.dictionary.loo
 
 import java.util.Iterator;
 
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
+import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 
 /**
@@ -38,13 +38,12 @@ public interface LookupConsumer
 	/**
 	 * Consumes the hits produced by the LookupAnnotator. This typically means
 	 * iterating over the hits and storing what's necessary to the JCas
-	 * @param jcas
-	 *            CAS for storing data
+	 * @param jcas CAS for storing data
 	 * @param lookupHitItr
 	 *            Iterator over LookupHit objects. These objects contain data
 	 *            about the annotation span plus any associated metadata.
-	 * @throws AnnotatorProcessException
+	 * @throws AnalysisEngineProcessException
 	 */
-	public void consumeHits(JCas jcas, Iterator lookupHitItr)
+	public void consumeHits(JCas jcas, Iterator<LookupHit> lookupHitItr)
 			throws AnalysisEngineProcessException;
 }

Copied: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java (from r1449821, incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilities.java)
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java?p2=incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java&p1=incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilities.java&r1=1449821&r2=1449951&rev=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilities.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java Mon Feb 25 22:50:42 2013
@@ -18,20 +18,6 @@
  */
 package org.apache.ctakes.dictionary.lookup.ae;
 
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.sql.Connection;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Properties;
-import java.util.Set;
-import java.util.StringTokenizer;
-
 import org.apache.ctakes.core.resource.FileResource;
 import org.apache.ctakes.core.resource.JdbcConnectionResource;
 import org.apache.ctakes.core.resource.LuceneIndexReaderResource;
@@ -44,269 +30,285 @@ import org.apache.ctakes.dictionary.look
 import org.apache.ctakes.dictionary.lookup.strtable.StringTable;
 import org.apache.ctakes.dictionary.lookup.strtable.StringTableDictionaryImpl;
 import org.apache.ctakes.dictionary.lookup.strtable.StringTableFactory;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
-import org.jdom.Document;
-import org.jdom.Element;
-import org.jdom.JDOMException;
-import org.jdom.input.SAXBuilder;
-
+import org.apache.log4j.Logger;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
+import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
+import org.apache.uima.resource.ResourceAccessException;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.sql.Connection;
+import java.util.*;
+
+
+// TODO Finish this refactor
 
 /**
  * @author Mayo Clinic
  */
-public class LookupParseUtilities
-{
-	//returns a set of LookupSpec objects
-	public static Set parseDescriptor(File descFile, UimaContext aContext, int maxListSize)
-			throws JDOMException, IOException, Exception
-	{
-		SAXBuilder saxBuilder = new SAXBuilder();
-		Document doc = saxBuilder.build(descFile);
-		maxSizeList = maxListSize;	//ohnlp-Bugs-3296301 fixes limit the search results to fixed 100 records.
-		Map dictMap = parseDictionaries(aContext, doc.getRootElement().getChild(
-				"dictionaries"));
-		//ohnlp-Bugs-3296301
-		return parseLookupBindingXml(aContext, dictMap, doc.getRootElement().getChild("lookupBindings"));
-	}
-
-	public static Set parseDescriptor(File descFile, UimaContext aContext)
-	throws JDOMException, IOException, Exception
-	{
-		SAXBuilder saxBuilder = new SAXBuilder();
-		Document doc = saxBuilder.build(descFile);
-		Map dictMap = parseDictionaries(aContext, doc.getRootElement().getChild(
-		"dictionaries"));
-		//ohnlp-Bugs-3296301
-		return parseLookupBindingXml(aContext, dictMap, doc.getRootElement().getChild("lookupBindings"));
-	}
-	private static Map parseDictionaries(UimaContext aContext,
-			Element dictetteersEl) throws AnnotatorContextException, Exception
-	{
-		Map m = new HashMap();
-		Iterator dictItr = dictetteersEl.getChildren().iterator();
-		while (dictItr.hasNext())
-		{
-			Element dictEl = (Element) dictItr.next();
-			String id = dictEl.getAttributeValue("id");
-			DictionaryEngine dictEngine = LookupParseUtilities.parseDictionaryXml(
-					aContext,
-					dictEl);
-			m.put(id, dictEngine);
-		}
-		return m;
-	}
-
-	private static DictionaryEngine parseDictionaryXml(UimaContext annotCtx,
-			Element rootDictEl) throws AnnotatorContextException, Exception
-	{
-		String extResrcKey = rootDictEl.getAttributeValue("externalResourceKey");
-		Boolean keepCase = new Boolean(rootDictEl.getAttributeValue("caseSensitive"));
-		Object extResrc = annotCtx.getResourceObject(extResrcKey);
-		if (extResrc == null)
-		{
-			throw new Exception("Unable to find external resource with key:"
-					+ extResrcKey);
-		}
-
-		Element lookupFieldEl = rootDictEl.getChild("lookupField");
-		String lookupFieldName = lookupFieldEl.getAttributeValue("fieldName");
-
-		Dictionary dict;
-
-		Element implEl = (Element) rootDictEl.getChild("implementation")
-				.getChildren()
-				.get(0);
-		String implType = implEl.getName();
-		if (implType.equals("luceneImpl"))
-		{
-			if (!(extResrc instanceof LuceneIndexReaderResource))
-			{
-				throw new Exception("Expected external resource to be:"
-						+ LuceneIndexReaderResource.class);
-			}
-			IndexReader indexReader = ((LuceneIndexReaderResource) extResrc).getIndexReader();
-			IndexSearcher indexSearcher = new IndexSearcher(indexReader);
-			// Added 'MaxListSize' ohnlp-Bugs-3296301
-			dict = new LuceneDictionaryImpl(indexSearcher, lookupFieldName, maxSizeList);
-		}
-		else if (implType.equals("jdbcImpl"))
-		{
-			String tableName = implEl.getAttributeValue("tableName");
-			if (!(extResrc instanceof JdbcConnectionResource))
-			{
-				throw new Exception("Expected external resource to be:"
-						+ JdbcConnectionResource.class);
-			}
-			Connection conn = ((JdbcConnectionResource) extResrc).getConnection();
-			dict = new JdbcDictionaryImpl(conn, tableName, lookupFieldName);
-		}
-        else if (implType.equals("csvImpl"))
-        {
-            String fieldDelimiter = implEl.getAttributeValue("delimiter");            
-            if (!(extResrc instanceof FileResource))
-            {
-                throw new Exception("Expected external resource to be:"
-                        + FileResource.class);
+final public class LookupParseUtilitiesRefactor {
+
+   static private final Logger CLASS_LOGGER = Logger.getLogger( LookupParseUtilitiesRefactor.class );
+
+   private LookupParseUtilitiesRefactor() {}
+
+   //returns a set of LookupSpec objects
+   public static Set<LookupSpec> parseDescriptor( final File descFile, final UimaContext aContext, final int maxListSize )
+         throws JDOMException, IOException, AnnotatorContextException, ResourceAccessException {
+      final SAXBuilder saxBuilder = new SAXBuilder();
+      final Document doc = saxBuilder.build( descFile );
+      MAX_LIST_SIZE = maxListSize;   //ohnlp-Bugs-3296301 fixes limit the search results to fixed 100 records.
+      final Map<String,DictionaryEngine> dictMap = parseDictionaries( aContext,
+                                                                      doc.getRootElement().getChild( "dictionaries" ) );
+      //ohnlp-Bugs-3296301
+      return parseLookupBindingXml( aContext, dictMap, doc.getRootElement().getChild( "lookupBindings" ) );
+   }
+
+   public static Set<LookupSpec> parseDescriptor( final File descFile, final UimaContext aContext )
+         throws JDOMException, IOException, AnnotatorContextException, ResourceAccessException {
+      return parseDescriptor( descFile, aContext, Integer.MAX_VALUE );
+   }
+
+   private static Map<String,DictionaryEngine> parseDictionaries( final UimaContext aContext,
+                                                                  final Element dictetteersEl )
+         throws AnnotatorContextException, ResourceAccessException {
+      final Map<String,DictionaryEngine> m = new HashMap<String,DictionaryEngine>();
+      final List<Element> dictatteerChildren = dictetteersEl.getChildren();
+      for ( Element dictEl : dictatteerChildren ) {
+         final String id = dictEl.getAttributeValue( "id" );
+         final DictionaryEngine dictEngine = LookupParseUtilitiesRefactor.parseDictionaryXml( aContext, dictEl );
+         m.put( id, dictEngine );
+      }
+      return m;
+   }
+
+   private static DictionaryEngine parseDictionaryXml( final UimaContext annotCtx, final Element rootDictEl )
+         throws AnnotatorContextException, ResourceAccessException {
+      final String extResrcKey = rootDictEl.getAttributeValue( "externalResourceKey" );
+      // UimaContext.getResourceObject(..) throws ResourceAccessException
+      final Object extResrc = annotCtx.getResourceObject( extResrcKey );
+      if ( extResrc == null ) {
+         throw new ResourceAccessException( "Unable to find external resource with key:" + extResrcKey, null );
+      }
+
+      final Element lookupFieldEl = rootDictEl.getChild( "lookupField" );
+      final String lookupFieldName = lookupFieldEl.getAttributeValue( "fieldName" );
+
+      Dictionary dict;
+      try {
+         if (rootDictEl.getChild( "implementation" ).getChildren().isEmpty() ) {
+            throw new ResourceAccessException( new IndexOutOfBoundsException() );
+         }
+         final Element implEl = (Element) rootDictEl.getChild( "implementation" ).getChildren().get( 0 );
+         final String implType = implEl.getName();
+         if ( implType.equals( "luceneImpl" ) ) {
+            if ( !(extResrc instanceof LuceneIndexReaderResource) ) {
+               throw new ResourceAccessException( "Expected external resource to be:"
+                                          + LuceneIndexReaderResource.class, new Object[]{extResrc} );
+            }
+            final IndexReader indexReader = ((LuceneIndexReaderResource) extResrc).getIndexReader();
+            final IndexSearcher indexSearcher = new IndexSearcher( indexReader );
+            // Added 'MaxListSize' ohnlp-Bugs-3296301
+            dict = new LuceneDictionaryImpl( indexSearcher, lookupFieldName, MAX_LIST_SIZE );
+         } else if ( implType.equals( "jdbcImpl" ) ) {
+            final String tableName = implEl.getAttributeValue( "tableName" );
+            if ( !(extResrc instanceof JdbcConnectionResource) ) {
+               throw new ResourceAccessException( "Expected external resource to be:"
+                                          + JdbcConnectionResource.class, new Object[]{extResrc} );
+            }
+            final Connection conn = ((JdbcConnectionResource) extResrc).getConnection();
+            dict = new JdbcDictionaryImpl( conn, tableName, lookupFieldName );
+         } else if ( implType.equals( "csvImpl" ) ) {
+            final String fieldDelimiter = implEl.getAttributeValue( "delimiter" );
+            if ( !(extResrc instanceof FileResource) ) {
+               throw new ResourceAccessException( "Expected external resource to be:"
+                                          + FileResource.class, new Object[]{extResrc} );
             }
 
-            String idxFieldNameStr = implEl.getAttributeValue("indexedFieldNames");
-            StringTokenizer st = new StringTokenizer(idxFieldNameStr, ",");
+            final String idxFieldNameStr = implEl.getAttributeValue( "indexedFieldNames" );
+            final StringTokenizer st = new StringTokenizer( idxFieldNameStr, "," );
             int arrIdx = 0;
             String[] idxFieldNameArr = new String[st.countTokens()];
-            while (st.hasMoreTokens())
-            {
-                idxFieldNameArr[arrIdx++] = st.nextToken().trim();
+            while ( st.hasMoreTokens() ) {
+               idxFieldNameArr[arrIdx++] = st.nextToken().trim();
+            }
+
+            final File csvFile = ((FileResource) extResrc).getFile();
+            try {
+               final StringTable strTable = StringTableFactory.build( new FileReader( csvFile ),
+                     fieldDelimiter, idxFieldNameArr, true );
+               dict = new StringTableDictionaryImpl( strTable, lookupFieldName );
+            } catch ( FileNotFoundException fnfE ) {
+               throw new ResourceAccessException( "Could not open csv file", new Object[]{csvFile} );
+            } catch (IOException ioE ) {
+               throw new ResourceAccessException( "Could not open csv file", new Object[]{csvFile} );
+            }
+         } else {
+            throw new ResourceAccessException( "Unsupported impl type:" + implType, new Object[]{implType} );
+         }
+
+         final List<Element> rootDictChildren = rootDictEl.getChild( "metaFields" ).getChildren();
+         for ( Element metaFieldEl : rootDictChildren ) {
+            final String metaFieldName = metaFieldEl.getAttributeValue( "fieldName" );
+            dict.retainMetaData( metaFieldName );
+         }
+      } catch ( NullPointerException npE ) {
+         // thrown all over this method ...
+         throw new ResourceAccessException( npE );
+      }
+      final boolean keepCase = Boolean.parseBoolean( rootDictEl.getAttributeValue( "caseSensitive" ) );
+      final DictionaryEngine dictEngine = new DictionaryEngine( dict, keepCase );
+      final Element excludeList = rootDictEl.getChild( "excludeList" );
+      if ( excludeList != null && excludeList.getChildren() != null && !excludeList.getChildren().isEmpty() ) {
+         addExcludeList( dictEngine, excludeList.getChildren() );
+      }
+      return dictEngine;
+   }
+
+
+   /*
+    * Word(s) not to look up
+    * TODO Consider adding common words as possible performance improvement
+    */
+   private static void addExcludeList( final DictionaryEngine dictionaryEngine, final List<Element> elementList ) {
+      final Set<String> excludeValues = new HashSet<String>( elementList.size() );
+      for ( Element item : elementList ) {
+         final String excludeValue = item.getAttributeValue( "value" );
+         CLASS_LOGGER.info( "Adding exclude value[" + excludeValue + "]" );
+         excludeValues.add( excludeValue );
+      }
+      final StringPreLookupFilterImpl filter = new StringPreLookupFilterImpl( excludeValues );
+      dictionaryEngine.addPreLookupFilter( filter );
+   }
+
+
+   private static Set<LookupSpec> parseLookupBindingXml( final UimaContext annotCtx,
+                                                         final Map<String,DictionaryEngine> dictMap,
+                                                         final Element lookupBindingsEl )
+         throws AnnotatorContextException {
+      final Class[] constrArgs = {UimaContext.class, Properties.class};
+      final Class[] constrArgsConsum = {UimaContext.class, Properties.class, int.class};//ohnlp-Bugs-3296301
+      final Class[] constrArgsConsumB = {UimaContext.class, Properties.class};
+
+      final Set<LookupSpec> lsSet = new HashSet<LookupSpec>();
+      final List<Element> bindingChildren = lookupBindingsEl.getChildren();
+      try {
+         for ( Element bindingEl : bindingChildren ) {
+            final Element dictEl = bindingEl.getChild( "dictionaryRef" );
+            final String dictID = dictEl.getAttributeValue( "idRef" );
+            final DictionaryEngine dictEngine = dictMap.get( dictID );
+            if ( dictEngine == null ) {
+               throw new AnnotatorContextException( "Dictionary undefined: " + dictID, null );
+            }
+
+            final Element lookupInitEl = bindingEl.getChild( "lookupInitializer" );
+            final String liClassName = lookupInitEl.getAttributeValue( "className" );
+            final Element liPropertiesEl = lookupInitEl.getChild( "properties" );
+            final Properties liProps = parsePropertiesXml( liPropertiesEl );
+            final Class liClass = Class.forName( liClassName );
+            final Constructor liConstr = liClass.getConstructor( constrArgs );
+            final Object[] liArgs = {annotCtx, liProps};
+            final LookupInitializer li = (LookupInitializer) liConstr.newInstance( liArgs );
+
+            final Element lookupConsumerEl = bindingEl.getChild( "lookupConsumer" );
+            final String lcClassName = lookupConsumerEl.getAttributeValue( "className" );
+            final Element lcPropertiesEl = lookupConsumerEl.getChild( "properties" );
+            final Properties lcProps = parsePropertiesXml( lcPropertiesEl );
+            final Class lcClass = Class.forName( lcClassName );
+            final Constructor[] consts = lcClass.getConstructors();
+            Constructor lcConstr = null;
+            Object[] lcArgs = null;
+            for ( Constructor constConstr : consts ) {
+               lcConstr = constConstr;
+               if ( Arrays.equals( constrArgsConsum, lcConstr.getParameterTypes() ) ) {
+                  lcConstr = lcClass.getConstructor( constrArgsConsum );
+                  lcArgs = new Object[]{annotCtx, lcProps, MAX_LIST_SIZE};//ohnlp-Bugs-3296301
+               } else if ( Arrays.equals( constrArgsConsumB, lcConstr.getParameterTypes() ) ) {
+                  lcConstr = lcClass.getConstructor( constrArgsConsumB );
+                  lcArgs = new Object[]{annotCtx, lcProps};
+               }
             }
-            
-            File csvFile = ((FileResource) extResrc).getFile();
-            StringTable strTable = StringTableFactory.build(
-                    new FileReader(csvFile),
-                    fieldDelimiter,
-                    idxFieldNameArr,
-                    true);
-            dict = new StringTableDictionaryImpl(strTable, lookupFieldName);
-        }
-		else
-		{
-			throw new Exception("Unsupported impl type:" + implType);
-		}
-
-		Iterator metaFieldItr = rootDictEl.getChild("metaFields")
-				.getChildren()
-				.iterator();
-		while (metaFieldItr.hasNext())
-		{
-			Element metaFieldEl = (Element) metaFieldItr.next();
-			String metaFieldName = metaFieldEl.getAttributeValue("fieldName");
-			dict.retainMetaData(metaFieldName);
-		}
-
-		DictionaryEngine dictEngine = new DictionaryEngine(dict, keepCase.booleanValue()); 
-
-	    Element excludeList = rootDictEl.getChild("excludeList");
-	    
-	    if (excludeList != null && excludeList.getChildren() != null && excludeList.getChildren().size() > 0) {
-	    	addExcludeList(dictEngine, excludeList.getChildren().iterator());
-	    }
-
-		return dictEngine;
-	}
-
-	
-	/*
-	 * Word(s) not to look up
-	 * TODO Consider adding common words as possible performance improvement
-	 */
-	private static void addExcludeList(DictionaryEngine ge, Iterator itr) {
-
-		HashSet hs = new HashSet();
-	    
-		while(itr.hasNext()) {
-			Element item = (Element) itr.next();
-			String s = (String)item.getAttributeValue("value");
-			System.out.println("Adding exclude value["+s+"]"); // TODO - use logger      
-			hs.add(s);
-	    }
-	    
-	    StringPreLookupFilterImpl plf = new StringPreLookupFilterImpl(hs);
-	    ge.addPreLookupFilter(plf);
-	}
-
-	
-	private static Set parseLookupBindingXml(UimaContext annotCtx,
-			Map dictMap, Element lookupBindingsEl) throws Exception {
-
-		Set lsSet = new HashSet();
-		Iterator itr = lookupBindingsEl.getChildren().iterator();
-		while (itr.hasNext())
-		{
-			Element bindingEl = (Element) itr.next();
-
-			Element dictEl = bindingEl.getChild("dictionaryRef");
-			String dictID = dictEl.getAttributeValue("idRef");
-			DictionaryEngine dictEngine = (DictionaryEngine) dictMap.get(dictID);
-			if (dictEngine == null)
-			{
-				throw new Exception("Dictionary undefined: " + dictID);
-			}
-
-			Class[] constrArgs = { UimaContext.class, Properties.class };
-			Class[] constrArgsConsum = { UimaContext.class, Properties.class, int.class };//ohnlp-Bugs-3296301
-			Class[] constrArgsConsumB = { UimaContext.class, Properties.class };
-
-			Element lookupInitEl = bindingEl.getChild("lookupInitializer");
-			String liClassName = lookupInitEl.getAttributeValue("className");
-			Element liPropertiesEl = lookupInitEl.getChild("properties");
-			Properties liProps = parsePropertiesXml(liPropertiesEl);
-			Class liClass = Class.forName(liClassName);
-			Constructor liConstr = liClass.getConstructor(constrArgs);
-			Object[] liArgs = { annotCtx, liProps };
-			LookupInitializer li = (LookupInitializer) liConstr.newInstance(liArgs);
-
-			Element lookupConsumerEl = bindingEl.getChild("lookupConsumer");
-			String lcClassName = lookupConsumerEl.getAttributeValue("className");
-			Element lcPropertiesEl = lookupConsumerEl.getChild("properties");
-			Properties lcProps = parsePropertiesXml(lcPropertiesEl);
-			Class lcClass = Class.forName(lcClassName);
-			Constructor[] consts = lcClass.getConstructors();
-			Constructor lcConstr = null;
-			Object[] lcArgs = null;
-			for(int i=0;i<consts.length;i++)
-			{
-			lcConstr = consts[i];
-				if (Arrays.equals(constrArgsConsum,lcConstr.getParameterTypes()) )
-				{
-					lcConstr = lcClass.getConstructor(constrArgsConsum);
-					lcArgs = new Object[]{ annotCtx, lcProps, maxSizeList };//ohnlp-Bugs-3296301					
-				}
-				else if (Arrays.equals(constrArgsConsumB,lcConstr.getParameterTypes()) )
-				{
-					lcConstr = lcClass.getConstructor(constrArgsConsumB);
-					lcArgs = new Object[]{ annotCtx, lcProps };
-				}				
-			}
-
-			LookupConsumer lc = (LookupConsumer) lcConstr.newInstance(lcArgs);
-			LookupAlgorithm la = li.getLookupAlgorithm(dictEngine);
-
-			LookupSpec ls = new LookupSpec(la, li, lc);
-
-			lsSet.add(ls);
-		}
-		return lsSet;
-	}
-	/**
-	 * Get the maximum list size to be returned from a lucene index
-	 * @return maxSizeList
-	 */
-	public static int getMaxSizeList () {
-		return maxSizeList;
-	}
-	/**
-	 * Set the maximum list size to be returned from a lucene index
-	 * @return maxSizeList
-	 */
-	public static void setMaxSizeList (int maxListSize) {
-		maxSizeList = maxListSize;
-	}
-	
-	private static Properties parsePropertiesXml(Element propsEl)
-	{
-		Properties props = new Properties();
-		Iterator itr = propsEl.getChildren().iterator();
-		while (itr.hasNext())
-		{
-			Element propEl = (Element) itr.next();
-			String key = propEl.getAttributeValue("key");
-			String value = propEl.getAttributeValue("value");
-			props.put(key, value);
-		}
-		return props;
-	}
-	// Added 'maxListSize'.  Size equals max int by default 
-	private static int  maxSizeList = Integer.MAX_VALUE; //ohnlp-Bugs-3296301
 
-}
\ No newline at end of file
+            final LookupConsumer lc = (LookupConsumer) lcConstr.newInstance( lcArgs );
+            final LookupAlgorithm la = li.getLookupAlgorithm( dictEngine );
+
+            final LookupSpec ls = new LookupSpec( la, li, lc );
+
+            lsSet.add( ls );
+         }
+         // TODO refactor to catch ( ex1 | ex2 | ex3 ) when cTakes moves to java 7
+      } catch ( ClassNotFoundException cnfE ) {
+         // thrown by Class.forName(..)
+         throw new AnnotatorContextException( cnfE );
+      } catch ( NoSuchMethodException nsmE ) {
+         // thrown by Class.getConstructor(..)
+         throw new AnnotatorContextException( nsmE );
+      } catch ( SecurityException secE ) {
+         // thrown by Class.getConstructor(..)
+         throw new AnnotatorContextException( secE );
+      } catch ( InstantiationException instE ) {
+         // thrown by Class.newInstance(..)
+         throw new AnnotatorContextException( instE );
+      } catch ( IllegalAccessException iaE ) {
+         // thrown by Class.newInstance(..)
+         throw new AnnotatorContextException( iaE );
+      } catch ( InvocationTargetException itE ) {
+         // thrown by Class.newInstance(..)
+         throw new AnnotatorContextException( itE );
+      } catch ( AnnotatorInitializationException aiE ) {
+         // thrown by LookupInitializer.getLookupAlgorithm(..)
+         throw new AnnotatorContextException( aiE );
+      } catch ( ClassCastException ccE ) {
+         // thrown everywhere in this method ...
+         throw new AnnotatorContextException( ccE );
+      } catch ( NullPointerException npE ) {
+         // thrown everywhere in this method ...
+         throw new AnnotatorContextException( npE );
+      }
+      return lsSet;
+   }
+
+//   /**
+//    * Get the maximum list size to be returned from a lucene index
+//    *
+//    * @return MAX_LIST_SIZE
+//    */
+//   public static int getMaxSizeList() {
+//      return MAX_LIST_SIZE;
+//   }
+//
+//   /**
+//    * Set the maximum list size to be returned from a lucene index
+//    *
+//    * @return MAX_LIST_SIZE
+//    */
+//   public static void setMaxSizeList( int maxListSize ) {
+//      MAX_LIST_SIZE = maxListSize;
+//   }
+
+   private static Properties parsePropertiesXml( final Element propsEl ) {
+      final Properties props = new Properties();
+      final List<Element> propertyChildren = propsEl.getChildren();
+      for ( Element propEl : propertyChildren ) {
+         final String key = propEl.getAttributeValue( "key" );
+         final String value = propEl.getAttributeValue( "value" );
+         props.put( key, value );
+      }
+      return props;
+   }
+
+   // Added 'maxListSize'.  Size equals max int by default
+   private static int MAX_LIST_SIZE = Integer.MAX_VALUE; //ohnlp-Bugs-3296301
+
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java Mon Feb 25 22:50:42 2013
@@ -20,42 +20,41 @@ package org.apache.ctakes.dictionary.loo
 
 import org.apache.ctakes.dictionary.lookup.algorithms.LookupAlgorithm;
 
+import javax.annotation.concurrent.Immutable;
+
 /**
  * A container for three related classes used to lookup terms in a dictionary and process hits found.
  * <li>a lookup algorithm - a class with a <code>lookup</code> method that returns hits</li>
  * <li>a lookup initializer - a collection of methods used to initialize/control the lookup algorithm</li>
  * <li>a lookup consumer - class which processes hits found by the lookup algorithm,
  * and typically adds annotations to the CAS</li>
- * 
+ *
  * @author Mayo Clinic
  */
-public class LookupSpec
-{
-    private LookupAlgorithm iv_lookupAlgorithm;
-    private LookupInitializer iv_lookupInitializer;
-    private LookupConsumer iv_lookupConsumer;
-
-    public LookupSpec(LookupAlgorithm lookupAlgorithm,
-            LookupInitializer lookupInitializer, LookupConsumer lookupConsumer)
-    {
-        iv_lookupAlgorithm = lookupAlgorithm;
-        iv_lookupInitializer = lookupInitializer;
-        iv_lookupConsumer = lookupConsumer;
-    }
-
-    public LookupAlgorithm getLookupAlgorithm()
-    {
-        return iv_lookupAlgorithm;
-    }
-
-    public LookupInitializer getLookupInitializer()
-    {
-        return iv_lookupInitializer;
-    }
-
-    public LookupConsumer getLookupConsumer()
-    {
-        return iv_lookupConsumer;
-    }
+@Immutable
+final public class LookupSpec {
+   final private LookupAlgorithm _lookupAlgorithm;
+   final private LookupInitializer _lookupInitializer;
+   final private LookupConsumer _lookupConsumer;
+
+   public LookupSpec( final LookupAlgorithm lookupAlgorithm,
+                      final LookupInitializer lookupInitializer,
+                      final LookupConsumer lookupConsumer ) {
+      _lookupAlgorithm = lookupAlgorithm;
+      _lookupInitializer = lookupInitializer;
+      _lookupConsumer = lookupConsumer;
+   }
+
+   public LookupAlgorithm getLookupAlgorithm() {
+      return _lookupAlgorithm;
+   }
+
+   public LookupInitializer getLookupInitializer() {
+      return _lookupInitializer;
+   }
+
+   public LookupConsumer getLookupConsumer() {
+      return _lookupConsumer;
+   }
 
 }

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java Mon Feb 25 22:50:42 2013
@@ -1,171 +1,127 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.ctakes.dictionary.lookup.ae;
-
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Properties;
-import java.util.Set;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.dictionary.lookup.ae;
 
+import org.apache.ctakes.dictionary.lookup.MetaDataHit;
+import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
+import org.apache.ctakes.typesystem.type.constants.CONST;
+import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 
-
-import org.apache.ctakes.dictionary.lookup.MetaDataHit;
-import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
-import org.apache.ctakes.typesystem.type.refsem.OntologyConcept;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.EventMention;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
-import org.apache.ctakes.typesystem.type.constants.CONST;
-
-/**
- * @author Mayo Clinic
- */
-public class NamedEntityLookupConsumerImpl extends BaseLookupConsumerImpl
-		implements LookupConsumer
-{
-
-	private final String CODE_MF_PRP_KEY = "codeMetaField";
-
-	private final String CODING_SCHEME_PRP_KEY = "codingScheme";
-
-	private final String TYPE_ID_FIELD = "typeIdField";
-	
-	private Properties iv_props;
-	
-	private static int iv_maxSize;
-
-	public NamedEntityLookupConsumerImpl(UimaContext aCtx, Properties props, int maxListSize)
-	{
-		// TODO property validation could be done here
-		iv_props = props;
-		iv_maxSize = maxListSize;
-	}
-	public NamedEntityLookupConsumerImpl(UimaContext aCtx, Properties props)
-	{
-		// TODO property validation could be done here
-		iv_props = props;
-	}
-
-	private int countUniqueCodes(Collection hitsAtOffset) {
-		Iterator lhAtOffsetItr = hitsAtOffset.iterator();
-		Set<String> codes = new HashSet<String>();
-		while (lhAtOffsetItr.hasNext())
-		{
-			LookupHit lh = (LookupHit) lhAtOffsetItr.next();
-
-			MetaDataHit mdh = lh.getDictMetaDataHit();
-
-			String code = mdh.getMetaFieldValue(iv_props.getProperty(CODE_MF_PRP_KEY));
-			if (codes.contains(code)) {
-				// don't create a second entry in the array for a code already seen, including null 
-			} else {
-				
-				codes.add(code);
-
-			}
-		}
-		
-		return codes.size();
-	}
-
-	
-	public void consumeHits(JCas jcas, Iterator lhItr)
-			throws AnalysisEngineProcessException
-	{
-
-		String typeId = null;
-		Iterator hitsByOffsetItr = organizeByOffset(lhItr);
-		while (hitsByOffsetItr.hasNext())
-		{
-			Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next();
-
-			
-			FSArray ocArr = new FSArray(jcas, countUniqueCodes(hitsAtOffsetCol));
-			
-			int ocArrIdx = 0;
-
-			// iterate over the LookupHit objects and create
-			// a corresponding JCas OntologyConcept object that will
-			// be placed in a FSArray
-			Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator();
-			int neBegin = -1;
-			int neEnd = -1;
-			Set<String> codes = new HashSet<String>();
-			while (lhAtOffsetItr.hasNext())
-			{
-				LookupHit lh = (LookupHit) lhAtOffsetItr.next();
-				neBegin = lh.getStartOffset();
-				neEnd = lh.getEndOffset();
-
-				MetaDataHit mdh = lh.getDictMetaDataHit();
-
-				String code = mdh.getMetaFieldValue(iv_props.getProperty(CODE_MF_PRP_KEY));
-				if (codes.contains(code)) {
-					// don't create a second entry in the array for a code already seen, including null 
-				} else {
-					
-					OntologyConcept oc = new OntologyConcept(jcas);
-					oc.setCode(code);
-					oc.setCodingScheme(iv_props.getProperty(CODING_SCHEME_PRP_KEY));
-
-					if(iv_props.getProperty(TYPE_ID_FIELD) != null) {
-						typeId = iv_props.getProperty(TYPE_ID_FIELD);//mdh.getMetaFieldValue(iv_props.getProperty(TYPE_ID_FIELD));
-					}
-					
-					ocArr.set(ocArrIdx, oc);
-					ocArrIdx++;
-
-					codes.add(code);
-
-				}
-			}
-
-			int tid=CONST.NE_TYPE_ID_UNKNOWN;
-			if(typeId != null){
-				try { 
-					tid = Integer.parseInt(typeId);
-				} catch ( NumberFormatException nfe ) {
-					tid = CONST.NE_TYPE_ID_UNKNOWN;
-				}
-				
-			}
-
-			IdentifiedAnnotation neAnnot;
-			if (tid == CONST.NE_TYPE_ID_DRUG || tid == CONST.NE_TYPE_ID_UNKNOWN) {
-				neAnnot = new MedicationEventMention(jcas);	
-			} else {
-				neAnnot = new EntityMention(jcas);	
-			
-			}
-			
-			neAnnot.setBegin(neBegin);
-			neAnnot.setEnd(neEnd);
-			neAnnot.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_DICT_LOOKUP);
-			neAnnot.setOntologyConceptArr(ocArr);
-			neAnnot.setTypeID(tid);
-			neAnnot.addToIndexes();			
-		}
-	}
-}
\ No newline at end of file
+import java.util.*;
+
+/**
+ * @author Mayo Clinic
+ */
+public class NamedEntityLookupConsumerImpl extends BaseLookupConsumerImpl implements LookupConsumer {
+
+   private static final String CODE_MF_PRP_KEY = "codeMetaField";
+
+   private static final String CODING_SCHEME_PRP_KEY = "codingScheme";
+
+   private static final String TYPE_ID_FIELD = "typeIdField";
+
+   private final  Properties _properties;
+
+   private static int iv_maxSize;
+
+   public NamedEntityLookupConsumerImpl( final UimaContext aCtx, final Properties props, final int maxListSize ) {
+      // TODO property validation could be done here
+      _properties = props;
+      iv_maxSize = maxListSize;
+   }
+
+   public NamedEntityLookupConsumerImpl( final UimaContext aCtx, final Properties props ) {
+      // TODO property validation could be done here
+      _properties = props;
+   }
+
+   private int countUniqueCodes( final Collection<LookupHit> hitsAtOffset ) {
+      final String CODE_MF = _properties.getProperty( CODE_MF_PRP_KEY );
+      final Set<String> codes = new HashSet<String>();
+      for ( LookupHit lookupHit : hitsAtOffset ) {
+         final MetaDataHit mdh = lookupHit.getDictMetaDataHit();
+         final String code = mdh.getMetaFieldValue( CODE_MF );
+         codes.add( code );
+      }
+      return codes.size();
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void consumeHits( final JCas jcas, final Iterator<LookupHit> lhItr ) throws AnalysisEngineProcessException {
+      final String TYPE_ID = _properties.getProperty( TYPE_ID_FIELD );
+      final String CODE_MF = _properties.getProperty( CODE_MF_PRP_KEY );
+      final String CODING_SCHEME = _properties.getProperty( CODING_SCHEME_PRP_KEY );
+      int typeId = CONST.NE_TYPE_ID_UNKNOWN;
+      if ( TYPE_ID != null ) {
+         try {
+            typeId = Integer.parseInt( TYPE_ID );
+         } catch ( NumberFormatException nfe ) {
+            typeId = CONST.NE_TYPE_ID_UNKNOWN;
+         }
+      }
+      final Map<LookupHitKey, Set<LookupHit>> lookupHitMap = createLookupHitMap( lhItr );
+      for ( Map.Entry<LookupHitKey, Set<LookupHit>> entry : lookupHitMap.entrySet() ) {
+         final int uniqueCodeCount = countUniqueCodes( entry.getValue() );
+         final FSArray ocArr = new FSArray( jcas, uniqueCodeCount );
+         // iterate over the LookupHit objects and create
+         // a corresponding JCas OntologyConcept object that will
+         // be placed in a FSArray
+         int ocArrIdx = 0;
+         final Set<String> codes = new HashSet<String>();
+         for ( LookupHit lookupHit : entry.getValue() ) {
+            final MetaDataHit mdh = lookupHit.getDictMetaDataHit();
+            final String code = mdh.getMetaFieldValue( CODE_MF );
+            if ( !codes.contains( code ) ) {
+               // create only first entry in the array for a code
+               final OntologyConcept oc = new OntologyConcept( jcas );
+               oc.setCode( code );
+               oc.setCodingScheme( CODING_SCHEME );
+               ocArr.set( ocArrIdx, oc );
+               ocArrIdx++;
+               codes.add( code );
+            }
+         }
+         IdentifiedAnnotation neAnnot;
+         if ( typeId == CONST.NE_TYPE_ID_DRUG || typeId == CONST.NE_TYPE_ID_UNKNOWN ) {
+            neAnnot = new MedicationEventMention( jcas );
+         } else {
+            neAnnot = new EntityMention( jcas );
+         }
+         final int neBegin = entry.getKey().__start;
+         final int neEnd = entry.getKey().__end;
+         neAnnot.setBegin( neBegin );
+         neAnnot.setEnd( neEnd );
+         neAnnot.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP );
+         neAnnot.setOntologyConceptArr( ocArr );
+         neAnnot.setTypeID( typeId );
+         neAnnot.addToIndexes();
+      }
+   }
+
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java Mon Feb 25 22:50:42 2013
@@ -18,11 +18,6 @@
  */
 package org.apache.ctakes.dictionary.lookup.ae;
 
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Properties;
-
 import org.apache.ctakes.core.resource.LuceneIndexReaderResource;
 import org.apache.ctakes.dictionary.lookup.MetaDataHit;
 import org.apache.ctakes.dictionary.lookup.vo.LookupHit;
@@ -32,161 +27,122 @@ import org.apache.ctakes.typesystem.type
 import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention;
 import org.apache.log4j.Logger;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.*;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.resource.ResourceAccessException;
+
+import java.io.IOException;
+import java.util.*;
 
 /**
  * Implementation that takes Rxnorm dictionary lookup hits and stores only the
  * ones that are also present in the Orange Book.
- * 
+ *
  * @author Mayo Clinic
  */
-public class OrangeBookFilterConsumerImpl extends BaseLookupConsumerImpl
-		implements LookupConsumer
-{
-	// LOG4J logger based on class name
-	private Logger iv_logger = Logger.getLogger(getClass().getName());
-
-	private final String CODE_MF_PRP_KEY = "codeMetaField";
-
-	private final String CODING_SCHEME_PRP_KEY = "codingScheme";
-
-	private final String LUCENE_FILTER_RESRC_KEY_PRP_KEY = "luceneFilterExtResrcKey";
-
-	private Properties iv_props;
-
-	private IndexSearcher iv_searcher;
-	//ohnlp-Bugs-3296301 limits the search results to fixed 100 records.
-	// Added 'MaxListSize'
-	private int iv_maxHits;
-
-	public OrangeBookFilterConsumerImpl(UimaContext aCtx, Properties props, int maxListSize)
-			throws Exception
-	{
-		// TODO property validation could be done here
-		iv_props = props;
-		iv_maxHits = maxListSize;
-		String resrcName = iv_props.getProperty(LUCENE_FILTER_RESRC_KEY_PRP_KEY);
-		LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject(resrcName);
-		iv_searcher = new IndexSearcher(resrc.getIndexReader());
-	}
-	public OrangeBookFilterConsumerImpl(UimaContext aCtx, Properties props)
-	throws Exception
-	{
-		// TODO property validation could be done here
-		iv_props = props;
-		String resrcName = iv_props.getProperty(LUCENE_FILTER_RESRC_KEY_PRP_KEY);
-		LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject(resrcName);
-		iv_searcher = new IndexSearcher(resrc.getIndexReader());
-		iv_maxHits = Integer.MAX_VALUE;
-	}
-	public void consumeHits(JCas jcas, Iterator lhItr)
-			throws AnalysisEngineProcessException
-	{
-		Iterator hitsByOffsetItr = organizeByOffset(lhItr);
-		while (hitsByOffsetItr.hasNext())
-		{
-			Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next();
-
-			// iterate over the LookupHit objects
-			// code is only valid if the covered text is also present in the
-			// filter
-			Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator();
-			int neBegin = -1;
-			int neEnd = -1;
-			Collection validCodeCol = new HashSet();
-			while (lhAtOffsetItr.hasNext())
-			{
-				LookupHit lh = (LookupHit) lhAtOffsetItr.next();
-				neBegin = lh.getStartOffset();
-				neEnd = lh.getEndOffset();
-
-				String text = jcas.getDocumentText().substring(
-						lh.getStartOffset(),
-						lh.getEndOffset());
-				text = text.trim().toLowerCase();
-
-				MetaDataHit mdh = lh.getDictMetaDataHit();
-				String code = mdh.getMetaFieldValue(iv_props.getProperty(CODE_MF_PRP_KEY));
-
-				if (isValid("trade_name", text) || isValid("ingredient", text))
-				{
-					validCodeCol.add(code);
-				}
-				else
-				{
-					iv_logger.warn("Filtered out: "+text);
-				}
-			}
-
-			if (validCodeCol.size() > 0)
-			{
-				FSArray ocArr = createOntologyConceptArr(jcas, validCodeCol);
-				IdentifiedAnnotation neAnnot = new MedicationEventMention(jcas); // medication NEs are EventMention
-				neAnnot.setTypeID(CONST.NE_TYPE_ID_DRUG);
-				neAnnot.setBegin(neBegin);
-				neAnnot.setEnd(neEnd);
-				neAnnot.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_DICT_LOOKUP);
-				neAnnot.setOntologyConceptArr(ocArr);
-				neAnnot.addToIndexes();
-			}
-		}
-	}
-
-	/**
-	 * For each valid code, a corresponding JCas OntologyConcept object is
-	 * created and stored in a FSArray.
-	 * 
-	 * @param jcas
-	 * @param validCodeCol
-	 * @return
-	 */
-	private FSArray createOntologyConceptArr(JCas jcas, Collection validCodeCol)
-	{
-		FSArray ocArr = new FSArray(jcas, validCodeCol.size());
-		int ocArrIdx = 0;
-		Iterator validCodeItr = validCodeCol.iterator();
-		while (validCodeItr.hasNext())
-		{
-			String validCode = (String) validCodeItr.next();
-			OntologyConcept oc = new OntologyConcept(jcas);
-			oc.setCode(validCode);
-			oc.setCodingScheme(iv_props.getProperty(CODING_SCHEME_PRP_KEY));
-
-			ocArr.set(ocArrIdx, oc);
-			ocArrIdx++;
-		}
-		return ocArr;
-	}
-
-	private boolean isValid(String fieldName, String str)
-			throws AnalysisEngineProcessException
-	{
-		try
-		{
-			Query q = new TermQuery(new Term(fieldName, str));
-
-            TopDocs topDoc = iv_searcher.search(q, iv_maxHits);
-            ScoreDoc[] hits = topDoc.scoreDocs;
-            if ((hits != null) && (hits.length > 0))
-            {
-                return true;
-            }
-            else
-            {
-                return false;
+public class OrangeBookFilterConsumerImpl extends BaseLookupConsumerImpl implements LookupConsumer {
+   // LOG4J logger based on class name
+   private final Logger iv_logger = Logger.getLogger( getClass().getName() );
+
+   static private final String CODE_MF_PRP_KEY = "codeMetaField";
+
+   static private final String CODING_SCHEME_PRP_KEY = "codingScheme";
+
+   static private final String LUCENE_FILTER_RESRC_KEY_PRP_KEY = "luceneFilterExtResrcKey";
+
+   final private Properties _properties;
+
+   final private IndexSearcher _indexSearcher;
+   //ohnlp-Bugs-3296301 limits the search results to fixed 100 records.
+   // Added 'MaxListSize'
+   final private int _maxListSize;
+
+   public OrangeBookFilterConsumerImpl( final UimaContext aCtx, final Properties props, final int maxListSize )
+         throws ResourceAccessException, NullPointerException {
+      // TODO property validation could be done here
+      _properties = props;
+      _maxListSize = maxListSize;
+      final String resrcName = _properties.getProperty( LUCENE_FILTER_RESRC_KEY_PRP_KEY );
+      // UimaContext.getResourceObject(..) throws ResourceAccessException
+      final LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject( resrcName );
+      // Possible npE with resrc.getIndexReader()
+      _indexSearcher = new IndexSearcher( resrc.getIndexReader() );
+   }
+
+   public OrangeBookFilterConsumerImpl( final UimaContext aCtx, final Properties props )
+         throws Exception {
+      this( aCtx, props, Integer.MAX_VALUE );
+   }
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void consumeHits( final JCas jcas, final Iterator<LookupHit> lhItr ) throws AnalysisEngineProcessException {
+      final String CODE_MF = _properties.getProperty( CODE_MF_PRP_KEY );
+      final Map<LookupHitKey, Set<LookupHit>> lookupHitMap = createLookupHitMap( lhItr );
+      for ( Map.Entry<LookupHitKey, Set<LookupHit>> entry : lookupHitMap.entrySet() ) {
+         // iterate over the LookupHit objects
+         // code is only valid if the covered text is also present in the filter
+         final int neBegin = entry.getKey().__start;
+         final int neEnd = entry.getKey().__end;
+         final String text = jcas.getDocumentText().substring( neBegin, neEnd ).trim().toLowerCase();
+         final boolean isValid = isValid( "trade_name", text ) || isValid( "ingredient", text );
+         if ( isValid ) {
+            final Set<String> validCodes = new HashSet<String>();
+            for ( LookupHit lookupHit : entry.getValue() ) {
+               final MetaDataHit mdh = lookupHit.getDictMetaDataHit();
+               final String code = mdh.getMetaFieldValue( CODE_MF );
+               validCodes.add( code );
             }
-		}
-		catch (Exception e)
-		{
-			throw new AnalysisEngineProcessException(e);
-		}
-	}
-}
\ No newline at end of file
+            final FSArray ocArr = createOntologyConceptArr( jcas, validCodes );
+            IdentifiedAnnotation neAnnot = new MedicationEventMention( jcas ); // medication NEs are EventMention
+            neAnnot.setTypeID( CONST.NE_TYPE_ID_DRUG );
+            neAnnot.setBegin( neBegin );
+            neAnnot.setEnd( neEnd );
+            neAnnot.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP );
+            neAnnot.setOntologyConceptArr( ocArr );
+            neAnnot.addToIndexes();
+         } else {
+            iv_logger.warn( "Filtered out: " + text );
+         }
+      }
+   }
+
+   /**
+    * For each valid code, a corresponding JCas OntologyConcept object is
+    * created and stored in a FSArray.
+    *
+    * @param jcas       -
+    * @param validCodes -
+    * @return -
+    */
+   private FSArray createOntologyConceptArr( final JCas jcas, final Collection<String> validCodes ) {
+      final String CODING_SCHEME = _properties.getProperty( CODING_SCHEME_PRP_KEY );
+      final FSArray ocArr = new FSArray( jcas, validCodes.size() );
+      int ocArrIdx = 0;
+      for ( String validCode : validCodes ) {
+         final OntologyConcept oc = new OntologyConcept( jcas );
+         oc.setCode( validCode );
+         oc.setCodingScheme( CODING_SCHEME );
+         ocArr.set( ocArrIdx, oc );
+         ocArrIdx++;
+      }
+      return ocArr;
+   }
+
+   private boolean isValid( final String fieldName, final String text ) throws AnalysisEngineProcessException {
+      try {
+         final Query q = new TermQuery( new Term( fieldName, text ) );
+         final TopDocs topDoc = _indexSearcher.search( q, _maxListSize );
+         final ScoreDoc[] hits = topDoc.scoreDocs;
+         return hits != null && hits.length > 0;
+      } catch ( IOException ioE ) {
+         // thrown by IndexSearcher.search(..)
+         throw new AnalysisEngineProcessException( ioE );
+      }
+   }
+}

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java Mon Feb 25 22:50:42 2013
@@ -260,12 +260,14 @@ public class ThreadedDictionaryLookupAnn
    static private class LookupHitKey {
       final private int __start;
       final private int __end;
+      final private int __hashCode;
       private LookupHitKey( final LookupHit lookupHit ) {
          __start = lookupHit.getStartOffset();
          __end = lookupHit.getEndOffset();
+         __hashCode = 1000 * __end + __start;
       }
       public int hashCode() {
-         return 10000 *__start + __end;
+         return __hashCode;
       }
       public boolean equals( final Object object ) {
          return object instanceof LookupHitKey

Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java (original)
+++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java Mon Feb 25 22:50:42 2013
@@ -42,8 +42,7 @@ import java.util.*;
  *
  * @author Mayo Clinic
  */
-public abstract class UmlsToSnomedConsumerImpl extends BaseLookupConsumerImpl implements
-                                                                              LookupConsumer {
+public abstract class UmlsToSnomedConsumerImpl extends BaseLookupConsumerImpl implements LookupConsumer {
 
    static private final String CUI_MF_PRP_KEY = "cuiMetaField";
    static private final String TUI_MF_PRP_KEY = "tuiMetaField";
@@ -94,37 +93,125 @@ public abstract class UmlsToSnomedConsum
    protected abstract Set<String> getSnomedCodes( final String umlsCode ) throws SQLException, DictionaryException;
 
 
-   public void consumeHits( final JCas jcas, final Iterator lhItr ) throws AnalysisEngineProcessException {
+//   /**
+//    * {@inheritDoc}
+//    */
+//   @Override
+//   public void consumeHits( final JCas jcas, final Iterator<LookupHit> lhItr ) throws AnalysisEngineProcessException {
+//      try {
+//         final String cuiPropKey = props.getProperty( CUI_MF_PRP_KEY );
+//         final String tuiPropKey = props.getProperty( TUI_MF_PRP_KEY );
+//         final Iterator hitsByOffsetItr = organizeByOffset( lhItr );
+//         while ( hitsByOffsetItr.hasNext() ) {
+//            final Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next();
+//
+//            // Iterate over the LookupHit objects and group Snomed codes by NE type
+//            // For each NE type for which there is a hit, get all the Snomed codes
+//            // that map to the given CUI.
+//
+//            // Use key "cui,tui" to avoid duplicates at this offset
+//            final Set<String> cuiTuiSet = new HashSet<String>();
+//
+//            // key = type of named entity (java.lang.Integer)
+//            // val = set of UmlsConcept objects (java.util.Set)
+//            final Map<Integer,Set<UmlsConcept>> conceptMap = new HashMap<Integer,Set<UmlsConcept>>();
+//
+//            final Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator();
+//            int neBegin = -1;
+//            int neEnd = -1;
+//            while ( lhAtOffsetItr.hasNext() ) {
+//               final LookupHit lh = (LookupHit) lhAtOffsetItr.next();
+//               neBegin = lh.getStartOffset();
+//               neEnd = lh.getEndOffset();
+//
+//               final MetaDataHit mdh = lh.getDictMetaDataHit();
+//               final String cui = mdh.getMetaFieldValue( cuiPropKey );
+//               final String tui = mdh.getMetaFieldValue( tuiPropKey );
+//
+//               //String text = lh.getDictMetaDataHit().getMetaFieldValue("text");
+//               if ( !_validTuiSet.contains( tui ) ) {
+//                  continue;
+//               }
+//               final String cuiTuiKey = getUniqueKey( cui, tui );
+//               if ( cuiTuiSet.contains( cuiTuiKey ) ) {
+//                  continue;
+//               }
+//               cuiTuiSet.add( cuiTuiKey );
+//               final Set<String> snomedCodeSet = getSnomedCodes( cui );
+//               if ( !snomedCodeSet.isEmpty() ) {
+//                  final Integer neType = getNamedEntityType( tui );
+//                  Set<UmlsConcept> conceptSet;
+//                  if ( conceptMap.containsKey( neType ) ) {
+//                     conceptSet = conceptMap.get( neType );
+//                  } else {
+//                     conceptSet = new HashSet<UmlsConcept>();
+//                  }
+//                  final Collection<UmlsConcept> conceptCol = createConceptCol( jcas, cui, tui, snomedCodeSet );
+//                  conceptSet.addAll( conceptCol );
+//                  conceptMap.put( neType, conceptSet );
+//               }
+//            }
+//
+//            final Collection<Integer> conceptKeys = conceptMap.keySet();
+//            for ( Integer conceptKey : conceptKeys ) {
+//               final Set<UmlsConcept> conceptSet = conceptMap.get( conceptKey );
+//
+//               // Skip updating CAS if all Concepts for this type were filtered out
+//               // for this span.
+//               if ( !conceptSet.isEmpty() ) {
+//                  FSArray conceptArr = new FSArray( jcas, conceptSet.size() );
+//                  int arrIdx = 0;
+//                  for ( UmlsConcept umlsConcept : conceptSet ) {
+//                     conceptArr.set( arrIdx, umlsConcept );
+//                     arrIdx++;
+//                  }
+//
+//                  IdentifiedAnnotation neAnnot;
+//                  if ( conceptKey == CONST.NE_TYPE_ID_DRUG ) {
+//                     neAnnot = new MedicationEventMention( jcas );
+//                  } else {
+//                     neAnnot = new EntityMention( jcas );
+//                  }
+//                  neAnnot.setTypeID( conceptKey );
+//                  neAnnot.setBegin( neBegin );
+//                  neAnnot.setEnd( neEnd );
+//                  neAnnot.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP );
+//                  neAnnot.setOntologyConceptArr( conceptArr );
+//                  neAnnot.addToIndexes();
+//               }
+//            }
+//         }
+//      } catch ( Exception e ) {
+//         throw new AnalysisEngineProcessException( e );
+//      }
+//   }
+
+
+   /**
+    * {@inheritDoc}
+    */
+   @Override
+   public void consumeHits( final JCas jcas, final Iterator<LookupHit> lhItr ) throws AnalysisEngineProcessException {
       try {
          final String cuiPropKey = props.getProperty( CUI_MF_PRP_KEY );
          final String tuiPropKey = props.getProperty( TUI_MF_PRP_KEY );
-         final Iterator hitsByOffsetItr = organizeByOffset( lhItr );
-         while ( hitsByOffsetItr.hasNext() ) {
-            final Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next();
-
-            // Iterate over the LookupHit objects and group Snomed codes by NE type
-            // For each NE type for which there is a hit, get all the Snomed codes
-            // that map to the given CUI.
-
+         final Map<LookupHitKey, Set<LookupHit>> lookupHitMap = createLookupHitMap( lhItr );
+         // iterate over the LookupHit objects
+         for ( Map.Entry<LookupHitKey, Set<LookupHit>> entry : lookupHitMap.entrySet() ) {
+            // code is only valid if the covered text is also present in the filter
+            final int neBegin = entry.getKey().__start;
+            final int neEnd = entry.getKey().__end;
             // Use key "cui,tui" to avoid duplicates at this offset
             final Set<String> cuiTuiSet = new HashSet<String>();
-
-            // key = type of named entity (java.lang.Integer)
-            // val = set of UmlsConcept objects (java.util.Set)
+            // key = type of named entity, val = set of UmlsConcept objects
             final Map<Integer,Set<UmlsConcept>> conceptMap = new HashMap<Integer,Set<UmlsConcept>>();
-
-            final Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator();
-            int neBegin = -1;
-            int neEnd = -1;
-            while ( lhAtOffsetItr.hasNext() ) {
-               final LookupHit lh = (LookupHit) lhAtOffsetItr.next();
-               neBegin = lh.getStartOffset();
-               neEnd = lh.getEndOffset();
-
-               final MetaDataHit mdh = lh.getDictMetaDataHit();
+            // Iterate over the LookupHit objects and group Snomed codes by NE type
+            // For each NE type for which there is a hit, get all the Snomed codes
+            // that map to the given CUI.
+            for ( LookupHit lookupHit : entry.getValue() ) {
+               final MetaDataHit mdh = lookupHit.getDictMetaDataHit();
                final String cui = mdh.getMetaFieldValue( cuiPropKey );
                final String tui = mdh.getMetaFieldValue( tuiPropKey );
-
                //String text = lh.getDictMetaDataHit().getMetaFieldValue("text");
                if ( !_validTuiSet.contains( tui ) ) {
                   continue;
@@ -142,21 +229,18 @@ public abstract class UmlsToSnomedConsum
                      conceptSet = conceptMap.get( neType );
                   } else {
                      conceptSet = new HashSet<UmlsConcept>();
+                     conceptMap.put( neType, conceptSet );
                   }
                   final Collection<UmlsConcept> conceptCol = createConceptCol( jcas, cui, tui, snomedCodeSet );
                   conceptSet.addAll( conceptCol );
-                  conceptMap.put( neType, conceptSet );
                }
             }
 
-            final Collection<Integer> conceptKeys = conceptMap.keySet();
-            for ( Integer conceptKey : conceptKeys ) {
-               final Set<UmlsConcept> conceptSet = conceptMap.get( conceptKey );
-
-               // Skip updating CAS if all Concepts for this type were filtered out
-               // for this span.
+            for ( Map.Entry<Integer,Set<UmlsConcept>> conceptEntry : conceptMap.entrySet() ) {
+               final Set<UmlsConcept> conceptSet = conceptEntry.getValue();
+               // Skip updating CAS if all Concepts for this type were filtered out for this span.
                if ( !conceptSet.isEmpty() ) {
-                  FSArray conceptArr = new FSArray( jcas, conceptSet.size() );
+                  final FSArray conceptArr = new FSArray( jcas, conceptSet.size() );
                   int arrIdx = 0;
                   for ( UmlsConcept umlsConcept : conceptSet ) {
                      conceptArr.set( arrIdx, umlsConcept );
@@ -164,6 +248,7 @@ public abstract class UmlsToSnomedConsum
                   }
 
                   IdentifiedAnnotation neAnnot;
+                  final int conceptKey = conceptEntry.getKey();
                   if ( conceptKey == CONST.NE_TYPE_ID_DRUG ) {
                      neAnnot = new MedicationEventMention( jcas );
                   } else {
@@ -183,6 +268,7 @@ public abstract class UmlsToSnomedConsum
       }
    }
 
+
    private int getNamedEntityType( final String tui ) throws IllegalArgumentException {
       if ( _medicationSet.contains( tui ) ) {
          return CONST.NE_TYPE_ID_DRUG;



Mime
View raw message