Return-Path: X-Original-To: apmail-incubator-ctakes-commits-archive@minotaur.apache.org Delivered-To: apmail-incubator-ctakes-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 3E023E5FE for ; Mon, 25 Feb 2013 22:51:17 +0000 (UTC) Received: (qmail 19496 invoked by uid 500); 25 Feb 2013 22:51:17 -0000 Delivered-To: apmail-incubator-ctakes-commits-archive@incubator.apache.org Received: (qmail 19468 invoked by uid 500); 25 Feb 2013 22:51:17 -0000 Mailing-List: contact ctakes-commits-help@incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ctakes-dev@incubator.apache.org Delivered-To: mailing list ctakes-commits@incubator.apache.org Received: (qmail 19460 invoked by uid 99); 25 Feb 2013 22:51:17 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 25 Feb 2013 22:51:17 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 25 Feb 2013 22:51:06 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id A9FF02388847; Mon, 25 Feb 2013 22:50:44 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1449951 [1/2] - in /incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup: ./ ae/ algorithms/ filter/ lucene/ phrasebuilder/ strtable/ vo/ Date: Mon, 25 Feb 2013 22:50:43 -0000 To: ctakes-commits@incubator.apache.org From: seanfinan@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130225225044.A9FF02388847@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: seanfinan Date: Mon Feb 25 22:50:42 2013 New Revision: 1449951 URL: http://svn.apache.org/r1449951 Log: CTAKES-159 : Added some typing CTAKES-160 : Refined some Exception try/catch and throws CTAKES-161 : modernizing for jdk 1.5+ Some improvements to the dictionary-lookup led to refactoring Some faster consumer implementations (pulled redundancies out of inner iterations) Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java - copied, changed from r1449821, incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java - copied, changed from r1449821, incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilities.java Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedDbConsumerImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/algorithms/FirstTokenPermutationImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/filter/StringPreLookupFilterImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/lucene/LuceneDocumentMetaDataHitImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/phrasebuilder/VariantPhraseBuilderImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/strtable/StringTableRowMetaDataHitImpl.java incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/vo/LookupHit.java Copied: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java (from r1449821, incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java) URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java?p2=incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java&p1=incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java&r1=1449821&r2=1449951&rev=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/AbstractBaseMetaDataHit.java Mon Feb 25 22:50:42 2013 @@ -25,41 +25,20 @@ import java.util.Collection; * * @author Mayo Clinic */ -public abstract class BaseMetaDataHitImpl implements MetaDataHit -{ - /** - * Two MetaDataHits are equal if their Meta field name/value pairs - * are equal. - */ - // In that case, this code is broken. Note that this can contain all of those, but that may not contain all of these -// public boolean equals(MetaDataHit mdh) -// { -// // check names first -// if (getMetaFieldNames().containsAll(mdh.getMetaFieldNames())) -// { -// // check values -// if (getMetaFieldValues().containsAll(mdh.getMetaFieldValues())) -// { -// return true; -// } -// } -// -// return false; -// } +public abstract class AbstractBaseMetaDataHit implements MetaDataHit { + private int _hashCode = Integer.MIN_VALUE; /** * Two MetaDataHits are equal if their Meta field name/value pairs * are equal. */ public boolean equals( final MetaDataHit mdh ) { - // Still not great as two equal names could have swapped equal values, but fast if complete check isn't required if ( getMetaFieldNames().size() != mdh.getMetaFieldNames().size() || getMetaFieldValues().size() != mdh.getMetaFieldValues().size() - // TODO add types to MetaDataHit || !getMetaFieldNames().containsAll( mdh.getMetaFieldNames() ) ) { return false; } - final Collection thisMetaFieldNames = (Collection)getMetaFieldNames(); + final Collection thisMetaFieldNames = getMetaFieldNames(); for ( String name : thisMetaFieldNames ) { if ( !getMetaFieldValue( name ).equals( mdh.getMetaFieldValue( name ) ) ) { return false; @@ -68,12 +47,7 @@ public abstract class BaseMetaDataHitImp return true; } - // Added 12-17-2012 to increase duplicate filtering in DictionaryLookupAnnotator - // TODO As far as I have seen, instances of MetaDataHit are immutable (and should be so annotated) - // If MetaDataHit ever becomes mutable then the hashCode may need to be reset upon mutation - private int _hashCode = Integer.MIN_VALUE; - @Override public int hashCode() { if ( _hashCode == Integer.MIN_VALUE ) { Added: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java?rev=1449951&view=auto ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java (added) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/BaseMetaDataHitImpl.java Mon Feb 25 22:50:42 2013 @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.dictionary.lookup; + +import java.util.Collection; + +/** + * Base impl for a MetaDataHit implementation. + * + * @author Mayo Clinic + * @deprecated please use {@link AbstractBaseMetaDataHit} + */ +@Deprecated +// Renamed AbstractBaseDictionary as this is not a full implementation. - 2/25/2013 SPF +public abstract class BaseMetaDataHitImpl implements MetaDataHit { + /** + * Two MetaDataHits are equal if their Meta field name/value pairs + * are equal. + */ + // In that case, this code is broken. Note that this can contain all of those, but that may not contain all of these +// public boolean equals(MetaDataHit mdh) +// { +// // check names first +// if (getMetaFieldNames().containsAll(mdh.getMetaFieldNames())) +// { +// // check values +// if (getMetaFieldValues().containsAll(mdh.getMetaFieldValues())) +// { +// return true; +// } +// } +// +// return false; +// } + + /** + * Two MetaDataHits are equal if their Meta field name/value pairs + * are equal. + */ + public boolean equals( final MetaDataHit mdh ) { + // Still not great as two equal names could have swapped equal values, but fast if complete check isn't required + if ( getMetaFieldNames().size() != mdh.getMetaFieldNames().size() + || getMetaFieldValues().size() != mdh.getMetaFieldValues().size() + // TODO add types to MetaDataHit + || !getMetaFieldNames().containsAll( mdh.getMetaFieldNames() ) ) { + return false; + } + final Collection thisMetaFieldNames = (Collection)getMetaFieldNames(); + for ( String name : thisMetaFieldNames ) { + if ( !getMetaFieldValue( name ).equals( mdh.getMetaFieldValue( name ) ) ) { + return false; + } + } + return true; + } + + + // Added 12-17-2012 to increase duplicate filtering in DictionaryLookupAnnotator + // TODO As far as I have seen, instances of MetaDataHit are immutable (and should be so annotated) + // If MetaDataHit ever becomes mutable then the hashCode may need to be reset upon mutation + private int _hashCode = Integer.MIN_VALUE; + + @Override + public int hashCode() { + if ( _hashCode == Integer.MIN_VALUE ) { + _hashCode = 27 * getMetaFieldNames().hashCode() + getMetaFieldValues().hashCode(); + } + return _hashCode; + } + +} Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/GenericMetaDataHitImpl.java Mon Feb 25 22:50:42 2013 @@ -18,36 +18,44 @@ */ package org.apache.ctakes.dictionary.lookup; +import javax.annotation.concurrent.Immutable; import java.util.Collection; +import java.util.Collections; import java.util.Map; import java.util.Set; /** - * * @author Mayo Clinic */ -public class GenericMetaDataHitImpl extends BaseMetaDataHitImpl - implements MetaDataHit -{ - private Map iv_nameValueMap; - - public GenericMetaDataHitImpl(Map metaNameValueMap) - { - iv_nameValueMap = metaNameValueMap; - } - - public String getMetaFieldValue(String metaFieldName) - { - return (String) iv_nameValueMap.get(metaFieldName); - } - - public Set getMetaFieldNames() - { - return iv_nameValueMap.keySet(); - } - - public Collection getMetaFieldValues() - { - return iv_nameValueMap.values(); - } -} \ No newline at end of file +@Immutable +public final class GenericMetaDataHitImpl extends AbstractBaseMetaDataHit implements MetaDataHit { + private final Map _nameValueMap; + + public GenericMetaDataHitImpl( final Map metaNameValueMap ) { + _nameValueMap = Collections.unmodifiableMap( metaNameValueMap ); + } + + /** + * {@inheritDoc} + */ + @Override + public String getMetaFieldValue( final String metaFieldName ) { + return _nameValueMap.get( metaFieldName ); + } + + /** + * {@inheritDoc} + */ + @Override + public Set getMetaFieldNames() { + return _nameValueMap.keySet(); + } + + /** + * {@inheritDoc} + */ + @Override + public Collection getMetaFieldValues() { + return _nameValueMap.values(); + } +} Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/MetaDataHit.java Mon Feb 25 22:50:42 2013 @@ -27,11 +27,11 @@ import java.util.Set; */ public interface MetaDataHit { - public Set getMetaFieldNames(); + public Set getMetaFieldNames(); - public Collection getMetaFieldValues(); + public Collection getMetaFieldValues(); public String getMetaFieldValue(String metaFieldName); - public boolean equals(MetaDataHit mdh); + public boolean equals(MetaDataHit mdh); } Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/BaseLookupConsumerImpl.java Mon Feb 25 22:50:42 2013 @@ -18,68 +18,71 @@ */ package org.apache.ctakes.dictionary.lookup.ae; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Set; - import org.apache.ctakes.dictionary.lookup.vo.LookupHit; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.jcas.JCas; - +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.jcas.JCas; + +import java.util.*; + /** * Provides some base functionality for subclasses. - * + * * @author Mayo Clinic - * */ -public abstract class BaseLookupConsumerImpl implements LookupConsumer -{ - /** - * Organizes the LookupHit objects by begin and end offsets. - * - * @param lhItr - * @return Iterator over Set objects. Each Set object is a collection of - * LookupHit objects with the same begin,end offsets. - */ - protected Iterator organizeByOffset(Iterator lhItr) - { - // key = begin,end key (java.lang.String) - // val = Set of LookupHit objects corresponding to begin,end - Map m = new HashMap(); - - while (lhItr.hasNext()) - { - LookupHit lh = (LookupHit) lhItr.next(); - String keyStr = getKeyString(lh.getStartOffset(), lh.getEndOffset()); - - Set s = null; - if (m.containsKey(keyStr)) - { - s = (Set) m.get(keyStr); - } - else - { - s = new HashSet(); - } - s.add(lh); - m.put(keyStr, s); - } - - return m.values().iterator(); - } - - private String getKeyString(int begin, int end) - { - StringBuffer sb = new StringBuffer(); - sb.append(begin); - sb.append(','); - sb.append(end); - return sb.toString(); - } +// TODO rename this class properly: AbstractBaseLookupConsumer. Requires refactoring outside module +public abstract class BaseLookupConsumerImpl implements LookupConsumer { + /** + * Organizes the LookupHit objects by begin and end offsets. + * + * @param lookupHitIterator - + * @return Iterator over Set objects. Each Set object is a collection of + * LookupHit objects with the same begin,end offsets. + */ + static protected Iterator organizeByOffset( final Iterator lookupHitIterator ) { + final Map> lookupHitMap = createLookupHitMap( lookupHitIterator ); + return lookupHitMap.values().iterator(); + } + + static protected Map> createLookupHitMap( final Iterator lookupHitIterator ) { + final Map> lookupHitMap = new HashMap>(); + while ( lookupHitIterator.hasNext() ) { + final LookupHit lookupHit = lookupHitIterator.next(); + final LookupHitKey key = new LookupHitKey( lookupHit ); + Set lookupHits = lookupHitMap.get( key ); + if ( lookupHits == null ) { + lookupHits = new HashSet(); + lookupHitMap.put( key, lookupHits ); + } + lookupHits.add( lookupHit ); + } + return lookupHitMap; + } + + /** + * Using a String as a HashMap Key can be slow as + * the hashCode is computed per character with each call - ditto for equals + */ + static protected class LookupHitKey { + final protected int __start; + final protected int __end; + final private int __hashCode; + + private LookupHitKey( final LookupHit lookupHit ) { + __start = lookupHit.getStartOffset(); + __end = lookupHit.getEndOffset(); + __hashCode = 1000 * __end + __start; + } + + public int hashCode() { + return __hashCode; + } + + public boolean equals( final Object object ) { + return object instanceof LookupHitKey + && __start == ((LookupHitKey) object).__start + && __end == ((LookupHitKey) object).__end; + } + } - public abstract void consumeHits(JCas jcas, Iterator lookupHitItr) - throws AnalysisEngineProcessException; } Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupAnnotationToJCasAdapter.java Mon Feb 25 22:50:42 2013 @@ -18,56 +18,72 @@ */ package org.apache.ctakes.dictionary.lookup.ae; -import java.util.HashMap; -import java.util.Map; - import org.apache.ctakes.dictionary.lookup.vo.LookupAnnotation; import org.apache.ctakes.dictionary.lookup.vo.LookupToken; import org.apache.uima.jcas.tcas.Annotation; +import java.util.HashMap; +import java.util.Map; + /** * @author Mayo Clinic - * */ -public class LookupAnnotationToJCasAdapter implements LookupAnnotation, LookupToken -{ - private Map iv_attrMap = new HashMap(); - - private Annotation iv_jcasAnnotObj; - - public LookupAnnotationToJCasAdapter(Annotation jcasAnnotObj) - { - iv_jcasAnnotObj = jcasAnnotObj; - } - - public void addStringAttribute(String attrKey, String attrVal) - { - iv_attrMap.put(attrKey, attrVal); - } - - public int getEndOffset() - { - return iv_jcasAnnotObj.getEnd(); - } - - public int getLength() - { - return getStartOffset() - getEndOffset(); - } - - public int getStartOffset() - { - return iv_jcasAnnotObj.getBegin(); - } - - public String getStringAttribute(String attrKey) - { - return (String) iv_attrMap.get(attrKey); - } - - public String getText() - { - return iv_jcasAnnotObj.getCoveredText(); - } +public class LookupAnnotationToJCasAdapter implements LookupAnnotation, LookupToken { + + final private Map _attributeMap; + final private Annotation _jcasAnnotation; + + public LookupAnnotationToJCasAdapter( final Annotation jcasAnnotation ) { + _jcasAnnotation = jcasAnnotation; + _attributeMap = new HashMap(); + } + + /** + * {@inheritDoc} + */ + @Override + public void addStringAttribute( final String attrKey, final String attrVal ) { + _attributeMap.put( attrKey, attrVal ); + } + + /** + * {@inheritDoc} + */ + @Override + public int getEndOffset() { + return _jcasAnnotation.getEnd(); + } + + /** + * {@inheritDoc} + */ + @Override + public int getLength() { + return getStartOffset() - getEndOffset(); + } + + /** + * {@inheritDoc} + */ + @Override + public int getStartOffset() { + return _jcasAnnotation.getBegin(); + } + + /** + * {@inheritDoc} + */ + @Override + public String getStringAttribute( final String attrKey ) { + return _attributeMap.get( attrKey ); + } + + /** + * {@inheritDoc} + */ + @Override + public String getText() { + return _jcasAnnotation.getCoveredText(); + } } Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupConsumer.java Mon Feb 25 22:50:42 2013 @@ -20,8 +20,8 @@ package org.apache.ctakes.dictionary.loo import java.util.Iterator; -import org.apache.uima.analysis_engine.AnalysisEngineProcessException; -import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException; +import org.apache.ctakes.dictionary.lookup.vo.LookupHit; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.jcas.JCas; /** @@ -38,13 +38,12 @@ public interface LookupConsumer /** * Consumes the hits produced by the LookupAnnotator. This typically means * iterating over the hits and storing what's necessary to the JCas - * @param jcas - * CAS for storing data + * @param jcas CAS for storing data * @param lookupHitItr * Iterator over LookupHit objects. These objects contain data * about the annotation span plus any associated metadata. - * @throws AnnotatorProcessException + * @throws AnalysisEngineProcessException */ - public void consumeHits(JCas jcas, Iterator lookupHitItr) + public void consumeHits(JCas jcas, Iterator lookupHitItr) throws AnalysisEngineProcessException; } Copied: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java (from r1449821, incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilities.java) URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java?p2=incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java&p1=incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilities.java&r1=1449821&r2=1449951&rev=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilities.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupParseUtilitiesRefactor.java Mon Feb 25 22:50:42 2013 @@ -18,20 +18,6 @@ */ package org.apache.ctakes.dictionary.lookup.ae; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.sql.Connection; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.StringTokenizer; - import org.apache.ctakes.core.resource.FileResource; import org.apache.ctakes.core.resource.JdbcConnectionResource; import org.apache.ctakes.core.resource.LuceneIndexReaderResource; @@ -44,269 +30,285 @@ import org.apache.ctakes.dictionary.look import org.apache.ctakes.dictionary.lookup.strtable.StringTable; import org.apache.ctakes.dictionary.lookup.strtable.StringTableDictionaryImpl; import org.apache.ctakes.dictionary.lookup.strtable.StringTableFactory; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.IndexSearcher; -import org.apache.uima.UimaContext; -import org.apache.uima.analysis_engine.annotator.AnnotatorContextException; -import org.jdom.Document; -import org.jdom.Element; -import org.jdom.JDOMException; -import org.jdom.input.SAXBuilder; - +import org.apache.log4j.Logger; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.annotator.AnnotatorContextException; +import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException; +import org.apache.uima.resource.ResourceAccessException; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.sql.Connection; +import java.util.*; + + +// TODO Finish this refactor /** * @author Mayo Clinic */ -public class LookupParseUtilities -{ - //returns a set of LookupSpec objects - public static Set parseDescriptor(File descFile, UimaContext aContext, int maxListSize) - throws JDOMException, IOException, Exception - { - SAXBuilder saxBuilder = new SAXBuilder(); - Document doc = saxBuilder.build(descFile); - maxSizeList = maxListSize; //ohnlp-Bugs-3296301 fixes limit the search results to fixed 100 records. - Map dictMap = parseDictionaries(aContext, doc.getRootElement().getChild( - "dictionaries")); - //ohnlp-Bugs-3296301 - return parseLookupBindingXml(aContext, dictMap, doc.getRootElement().getChild("lookupBindings")); - } - - public static Set parseDescriptor(File descFile, UimaContext aContext) - throws JDOMException, IOException, Exception - { - SAXBuilder saxBuilder = new SAXBuilder(); - Document doc = saxBuilder.build(descFile); - Map dictMap = parseDictionaries(aContext, doc.getRootElement().getChild( - "dictionaries")); - //ohnlp-Bugs-3296301 - return parseLookupBindingXml(aContext, dictMap, doc.getRootElement().getChild("lookupBindings")); - } - private static Map parseDictionaries(UimaContext aContext, - Element dictetteersEl) throws AnnotatorContextException, Exception - { - Map m = new HashMap(); - Iterator dictItr = dictetteersEl.getChildren().iterator(); - while (dictItr.hasNext()) - { - Element dictEl = (Element) dictItr.next(); - String id = dictEl.getAttributeValue("id"); - DictionaryEngine dictEngine = LookupParseUtilities.parseDictionaryXml( - aContext, - dictEl); - m.put(id, dictEngine); - } - return m; - } - - private static DictionaryEngine parseDictionaryXml(UimaContext annotCtx, - Element rootDictEl) throws AnnotatorContextException, Exception - { - String extResrcKey = rootDictEl.getAttributeValue("externalResourceKey"); - Boolean keepCase = new Boolean(rootDictEl.getAttributeValue("caseSensitive")); - Object extResrc = annotCtx.getResourceObject(extResrcKey); - if (extResrc == null) - { - throw new Exception("Unable to find external resource with key:" - + extResrcKey); - } - - Element lookupFieldEl = rootDictEl.getChild("lookupField"); - String lookupFieldName = lookupFieldEl.getAttributeValue("fieldName"); - - Dictionary dict; - - Element implEl = (Element) rootDictEl.getChild("implementation") - .getChildren() - .get(0); - String implType = implEl.getName(); - if (implType.equals("luceneImpl")) - { - if (!(extResrc instanceof LuceneIndexReaderResource)) - { - throw new Exception("Expected external resource to be:" - + LuceneIndexReaderResource.class); - } - IndexReader indexReader = ((LuceneIndexReaderResource) extResrc).getIndexReader(); - IndexSearcher indexSearcher = new IndexSearcher(indexReader); - // Added 'MaxListSize' ohnlp-Bugs-3296301 - dict = new LuceneDictionaryImpl(indexSearcher, lookupFieldName, maxSizeList); - } - else if (implType.equals("jdbcImpl")) - { - String tableName = implEl.getAttributeValue("tableName"); - if (!(extResrc instanceof JdbcConnectionResource)) - { - throw new Exception("Expected external resource to be:" - + JdbcConnectionResource.class); - } - Connection conn = ((JdbcConnectionResource) extResrc).getConnection(); - dict = new JdbcDictionaryImpl(conn, tableName, lookupFieldName); - } - else if (implType.equals("csvImpl")) - { - String fieldDelimiter = implEl.getAttributeValue("delimiter"); - if (!(extResrc instanceof FileResource)) - { - throw new Exception("Expected external resource to be:" - + FileResource.class); +final public class LookupParseUtilitiesRefactor { + + static private final Logger CLASS_LOGGER = Logger.getLogger( LookupParseUtilitiesRefactor.class ); + + private LookupParseUtilitiesRefactor() {} + + //returns a set of LookupSpec objects + public static Set parseDescriptor( final File descFile, final UimaContext aContext, final int maxListSize ) + throws JDOMException, IOException, AnnotatorContextException, ResourceAccessException { + final SAXBuilder saxBuilder = new SAXBuilder(); + final Document doc = saxBuilder.build( descFile ); + MAX_LIST_SIZE = maxListSize; //ohnlp-Bugs-3296301 fixes limit the search results to fixed 100 records. + final Map dictMap = parseDictionaries( aContext, + doc.getRootElement().getChild( "dictionaries" ) ); + //ohnlp-Bugs-3296301 + return parseLookupBindingXml( aContext, dictMap, doc.getRootElement().getChild( "lookupBindings" ) ); + } + + public static Set parseDescriptor( final File descFile, final UimaContext aContext ) + throws JDOMException, IOException, AnnotatorContextException, ResourceAccessException { + return parseDescriptor( descFile, aContext, Integer.MAX_VALUE ); + } + + private static Map parseDictionaries( final UimaContext aContext, + final Element dictetteersEl ) + throws AnnotatorContextException, ResourceAccessException { + final Map m = new HashMap(); + final List dictatteerChildren = dictetteersEl.getChildren(); + for ( Element dictEl : dictatteerChildren ) { + final String id = dictEl.getAttributeValue( "id" ); + final DictionaryEngine dictEngine = LookupParseUtilitiesRefactor.parseDictionaryXml( aContext, dictEl ); + m.put( id, dictEngine ); + } + return m; + } + + private static DictionaryEngine parseDictionaryXml( final UimaContext annotCtx, final Element rootDictEl ) + throws AnnotatorContextException, ResourceAccessException { + final String extResrcKey = rootDictEl.getAttributeValue( "externalResourceKey" ); + // UimaContext.getResourceObject(..) throws ResourceAccessException + final Object extResrc = annotCtx.getResourceObject( extResrcKey ); + if ( extResrc == null ) { + throw new ResourceAccessException( "Unable to find external resource with key:" + extResrcKey, null ); + } + + final Element lookupFieldEl = rootDictEl.getChild( "lookupField" ); + final String lookupFieldName = lookupFieldEl.getAttributeValue( "fieldName" ); + + Dictionary dict; + try { + if (rootDictEl.getChild( "implementation" ).getChildren().isEmpty() ) { + throw new ResourceAccessException( new IndexOutOfBoundsException() ); + } + final Element implEl = (Element) rootDictEl.getChild( "implementation" ).getChildren().get( 0 ); + final String implType = implEl.getName(); + if ( implType.equals( "luceneImpl" ) ) { + if ( !(extResrc instanceof LuceneIndexReaderResource) ) { + throw new ResourceAccessException( "Expected external resource to be:" + + LuceneIndexReaderResource.class, new Object[]{extResrc} ); + } + final IndexReader indexReader = ((LuceneIndexReaderResource) extResrc).getIndexReader(); + final IndexSearcher indexSearcher = new IndexSearcher( indexReader ); + // Added 'MaxListSize' ohnlp-Bugs-3296301 + dict = new LuceneDictionaryImpl( indexSearcher, lookupFieldName, MAX_LIST_SIZE ); + } else if ( implType.equals( "jdbcImpl" ) ) { + final String tableName = implEl.getAttributeValue( "tableName" ); + if ( !(extResrc instanceof JdbcConnectionResource) ) { + throw new ResourceAccessException( "Expected external resource to be:" + + JdbcConnectionResource.class, new Object[]{extResrc} ); + } + final Connection conn = ((JdbcConnectionResource) extResrc).getConnection(); + dict = new JdbcDictionaryImpl( conn, tableName, lookupFieldName ); + } else if ( implType.equals( "csvImpl" ) ) { + final String fieldDelimiter = implEl.getAttributeValue( "delimiter" ); + if ( !(extResrc instanceof FileResource) ) { + throw new ResourceAccessException( "Expected external resource to be:" + + FileResource.class, new Object[]{extResrc} ); } - String idxFieldNameStr = implEl.getAttributeValue("indexedFieldNames"); - StringTokenizer st = new StringTokenizer(idxFieldNameStr, ","); + final String idxFieldNameStr = implEl.getAttributeValue( "indexedFieldNames" ); + final StringTokenizer st = new StringTokenizer( idxFieldNameStr, "," ); int arrIdx = 0; String[] idxFieldNameArr = new String[st.countTokens()]; - while (st.hasMoreTokens()) - { - idxFieldNameArr[arrIdx++] = st.nextToken().trim(); + while ( st.hasMoreTokens() ) { + idxFieldNameArr[arrIdx++] = st.nextToken().trim(); + } + + final File csvFile = ((FileResource) extResrc).getFile(); + try { + final StringTable strTable = StringTableFactory.build( new FileReader( csvFile ), + fieldDelimiter, idxFieldNameArr, true ); + dict = new StringTableDictionaryImpl( strTable, lookupFieldName ); + } catch ( FileNotFoundException fnfE ) { + throw new ResourceAccessException( "Could not open csv file", new Object[]{csvFile} ); + } catch (IOException ioE ) { + throw new ResourceAccessException( "Could not open csv file", new Object[]{csvFile} ); + } + } else { + throw new ResourceAccessException( "Unsupported impl type:" + implType, new Object[]{implType} ); + } + + final List rootDictChildren = rootDictEl.getChild( "metaFields" ).getChildren(); + for ( Element metaFieldEl : rootDictChildren ) { + final String metaFieldName = metaFieldEl.getAttributeValue( "fieldName" ); + dict.retainMetaData( metaFieldName ); + } + } catch ( NullPointerException npE ) { + // thrown all over this method ... + throw new ResourceAccessException( npE ); + } + final boolean keepCase = Boolean.parseBoolean( rootDictEl.getAttributeValue( "caseSensitive" ) ); + final DictionaryEngine dictEngine = new DictionaryEngine( dict, keepCase ); + final Element excludeList = rootDictEl.getChild( "excludeList" ); + if ( excludeList != null && excludeList.getChildren() != null && !excludeList.getChildren().isEmpty() ) { + addExcludeList( dictEngine, excludeList.getChildren() ); + } + return dictEngine; + } + + + /* + * Word(s) not to look up + * TODO Consider adding common words as possible performance improvement + */ + private static void addExcludeList( final DictionaryEngine dictionaryEngine, final List elementList ) { + final Set excludeValues = new HashSet( elementList.size() ); + for ( Element item : elementList ) { + final String excludeValue = item.getAttributeValue( "value" ); + CLASS_LOGGER.info( "Adding exclude value[" + excludeValue + "]" ); + excludeValues.add( excludeValue ); + } + final StringPreLookupFilterImpl filter = new StringPreLookupFilterImpl( excludeValues ); + dictionaryEngine.addPreLookupFilter( filter ); + } + + + private static Set parseLookupBindingXml( final UimaContext annotCtx, + final Map dictMap, + final Element lookupBindingsEl ) + throws AnnotatorContextException { + final Class[] constrArgs = {UimaContext.class, Properties.class}; + final Class[] constrArgsConsum = {UimaContext.class, Properties.class, int.class};//ohnlp-Bugs-3296301 + final Class[] constrArgsConsumB = {UimaContext.class, Properties.class}; + + final Set lsSet = new HashSet(); + final List bindingChildren = lookupBindingsEl.getChildren(); + try { + for ( Element bindingEl : bindingChildren ) { + final Element dictEl = bindingEl.getChild( "dictionaryRef" ); + final String dictID = dictEl.getAttributeValue( "idRef" ); + final DictionaryEngine dictEngine = dictMap.get( dictID ); + if ( dictEngine == null ) { + throw new AnnotatorContextException( "Dictionary undefined: " + dictID, null ); + } + + final Element lookupInitEl = bindingEl.getChild( "lookupInitializer" ); + final String liClassName = lookupInitEl.getAttributeValue( "className" ); + final Element liPropertiesEl = lookupInitEl.getChild( "properties" ); + final Properties liProps = parsePropertiesXml( liPropertiesEl ); + final Class liClass = Class.forName( liClassName ); + final Constructor liConstr = liClass.getConstructor( constrArgs ); + final Object[] liArgs = {annotCtx, liProps}; + final LookupInitializer li = (LookupInitializer) liConstr.newInstance( liArgs ); + + final Element lookupConsumerEl = bindingEl.getChild( "lookupConsumer" ); + final String lcClassName = lookupConsumerEl.getAttributeValue( "className" ); + final Element lcPropertiesEl = lookupConsumerEl.getChild( "properties" ); + final Properties lcProps = parsePropertiesXml( lcPropertiesEl ); + final Class lcClass = Class.forName( lcClassName ); + final Constructor[] consts = lcClass.getConstructors(); + Constructor lcConstr = null; + Object[] lcArgs = null; + for ( Constructor constConstr : consts ) { + lcConstr = constConstr; + if ( Arrays.equals( constrArgsConsum, lcConstr.getParameterTypes() ) ) { + lcConstr = lcClass.getConstructor( constrArgsConsum ); + lcArgs = new Object[]{annotCtx, lcProps, MAX_LIST_SIZE};//ohnlp-Bugs-3296301 + } else if ( Arrays.equals( constrArgsConsumB, lcConstr.getParameterTypes() ) ) { + lcConstr = lcClass.getConstructor( constrArgsConsumB ); + lcArgs = new Object[]{annotCtx, lcProps}; + } } - - File csvFile = ((FileResource) extResrc).getFile(); - StringTable strTable = StringTableFactory.build( - new FileReader(csvFile), - fieldDelimiter, - idxFieldNameArr, - true); - dict = new StringTableDictionaryImpl(strTable, lookupFieldName); - } - else - { - throw new Exception("Unsupported impl type:" + implType); - } - - Iterator metaFieldItr = rootDictEl.getChild("metaFields") - .getChildren() - .iterator(); - while (metaFieldItr.hasNext()) - { - Element metaFieldEl = (Element) metaFieldItr.next(); - String metaFieldName = metaFieldEl.getAttributeValue("fieldName"); - dict.retainMetaData(metaFieldName); - } - - DictionaryEngine dictEngine = new DictionaryEngine(dict, keepCase.booleanValue()); - - Element excludeList = rootDictEl.getChild("excludeList"); - - if (excludeList != null && excludeList.getChildren() != null && excludeList.getChildren().size() > 0) { - addExcludeList(dictEngine, excludeList.getChildren().iterator()); - } - - return dictEngine; - } - - - /* - * Word(s) not to look up - * TODO Consider adding common words as possible performance improvement - */ - private static void addExcludeList(DictionaryEngine ge, Iterator itr) { - - HashSet hs = new HashSet(); - - while(itr.hasNext()) { - Element item = (Element) itr.next(); - String s = (String)item.getAttributeValue("value"); - System.out.println("Adding exclude value["+s+"]"); // TODO - use logger - hs.add(s); - } - - StringPreLookupFilterImpl plf = new StringPreLookupFilterImpl(hs); - ge.addPreLookupFilter(plf); - } - - - private static Set parseLookupBindingXml(UimaContext annotCtx, - Map dictMap, Element lookupBindingsEl) throws Exception { - - Set lsSet = new HashSet(); - Iterator itr = lookupBindingsEl.getChildren().iterator(); - while (itr.hasNext()) - { - Element bindingEl = (Element) itr.next(); - - Element dictEl = bindingEl.getChild("dictionaryRef"); - String dictID = dictEl.getAttributeValue("idRef"); - DictionaryEngine dictEngine = (DictionaryEngine) dictMap.get(dictID); - if (dictEngine == null) - { - throw new Exception("Dictionary undefined: " + dictID); - } - - Class[] constrArgs = { UimaContext.class, Properties.class }; - Class[] constrArgsConsum = { UimaContext.class, Properties.class, int.class };//ohnlp-Bugs-3296301 - Class[] constrArgsConsumB = { UimaContext.class, Properties.class }; - - Element lookupInitEl = bindingEl.getChild("lookupInitializer"); - String liClassName = lookupInitEl.getAttributeValue("className"); - Element liPropertiesEl = lookupInitEl.getChild("properties"); - Properties liProps = parsePropertiesXml(liPropertiesEl); - Class liClass = Class.forName(liClassName); - Constructor liConstr = liClass.getConstructor(constrArgs); - Object[] liArgs = { annotCtx, liProps }; - LookupInitializer li = (LookupInitializer) liConstr.newInstance(liArgs); - - Element lookupConsumerEl = bindingEl.getChild("lookupConsumer"); - String lcClassName = lookupConsumerEl.getAttributeValue("className"); - Element lcPropertiesEl = lookupConsumerEl.getChild("properties"); - Properties lcProps = parsePropertiesXml(lcPropertiesEl); - Class lcClass = Class.forName(lcClassName); - Constructor[] consts = lcClass.getConstructors(); - Constructor lcConstr = null; - Object[] lcArgs = null; - for(int i=0;i propertyChildren = propsEl.getChildren(); + for ( Element propEl : propertyChildren ) { + final String key = propEl.getAttributeValue( "key" ); + final String value = propEl.getAttributeValue( "value" ); + props.put( key, value ); + } + return props; + } + + // Added 'maxListSize'. Size equals max int by default + private static int MAX_LIST_SIZE = Integer.MAX_VALUE; //ohnlp-Bugs-3296301 + +} Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupSpec.java Mon Feb 25 22:50:42 2013 @@ -20,42 +20,41 @@ package org.apache.ctakes.dictionary.loo import org.apache.ctakes.dictionary.lookup.algorithms.LookupAlgorithm; +import javax.annotation.concurrent.Immutable; + /** * A container for three related classes used to lookup terms in a dictionary and process hits found. *
  • a lookup algorithm - a class with a lookup method that returns hits
  • *
  • a lookup initializer - a collection of methods used to initialize/control the lookup algorithm
  • *
  • a lookup consumer - class which processes hits found by the lookup algorithm, * and typically adds annotations to the CAS
  • - * + * * @author Mayo Clinic */ -public class LookupSpec -{ - private LookupAlgorithm iv_lookupAlgorithm; - private LookupInitializer iv_lookupInitializer; - private LookupConsumer iv_lookupConsumer; - - public LookupSpec(LookupAlgorithm lookupAlgorithm, - LookupInitializer lookupInitializer, LookupConsumer lookupConsumer) - { - iv_lookupAlgorithm = lookupAlgorithm; - iv_lookupInitializer = lookupInitializer; - iv_lookupConsumer = lookupConsumer; - } - - public LookupAlgorithm getLookupAlgorithm() - { - return iv_lookupAlgorithm; - } - - public LookupInitializer getLookupInitializer() - { - return iv_lookupInitializer; - } - - public LookupConsumer getLookupConsumer() - { - return iv_lookupConsumer; - } +@Immutable +final public class LookupSpec { + final private LookupAlgorithm _lookupAlgorithm; + final private LookupInitializer _lookupInitializer; + final private LookupConsumer _lookupConsumer; + + public LookupSpec( final LookupAlgorithm lookupAlgorithm, + final LookupInitializer lookupInitializer, + final LookupConsumer lookupConsumer ) { + _lookupAlgorithm = lookupAlgorithm; + _lookupInitializer = lookupInitializer; + _lookupConsumer = lookupConsumer; + } + + public LookupAlgorithm getLookupAlgorithm() { + return _lookupAlgorithm; + } + + public LookupInitializer getLookupInitializer() { + return _lookupInitializer; + } + + public LookupConsumer getLookupConsumer() { + return _lookupConsumer; + } } Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/NamedEntityLookupConsumerImpl.java Mon Feb 25 22:50:42 2013 @@ -1,171 +1,127 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.ctakes.dictionary.lookup.ae; - -import java.util.Collection; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Properties; -import java.util.Set; +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.dictionary.lookup.ae; +import org.apache.ctakes.dictionary.lookup.MetaDataHit; +import org.apache.ctakes.dictionary.lookup.vo.LookupHit; +import org.apache.ctakes.typesystem.type.constants.CONST; +import org.apache.ctakes.typesystem.type.refsem.OntologyConcept; +import org.apache.ctakes.typesystem.type.textsem.EntityMention; +import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; +import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.cas.FSArray; - -import org.apache.ctakes.dictionary.lookup.MetaDataHit; -import org.apache.ctakes.dictionary.lookup.vo.LookupHit; -import org.apache.ctakes.typesystem.type.refsem.OntologyConcept; -import org.apache.ctakes.typesystem.type.textsem.EntityMention; -import org.apache.ctakes.typesystem.type.textsem.EventMention; -import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; -import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention; -import org.apache.ctakes.typesystem.type.constants.CONST; - -/** - * @author Mayo Clinic - */ -public class NamedEntityLookupConsumerImpl extends BaseLookupConsumerImpl - implements LookupConsumer -{ - - private final String CODE_MF_PRP_KEY = "codeMetaField"; - - private final String CODING_SCHEME_PRP_KEY = "codingScheme"; - - private final String TYPE_ID_FIELD = "typeIdField"; - - private Properties iv_props; - - private static int iv_maxSize; - - public NamedEntityLookupConsumerImpl(UimaContext aCtx, Properties props, int maxListSize) - { - // TODO property validation could be done here - iv_props = props; - iv_maxSize = maxListSize; - } - public NamedEntityLookupConsumerImpl(UimaContext aCtx, Properties props) - { - // TODO property validation could be done here - iv_props = props; - } - - private int countUniqueCodes(Collection hitsAtOffset) { - Iterator lhAtOffsetItr = hitsAtOffset.iterator(); - Set codes = new HashSet(); - while (lhAtOffsetItr.hasNext()) - { - LookupHit lh = (LookupHit) lhAtOffsetItr.next(); - - MetaDataHit mdh = lh.getDictMetaDataHit(); - - String code = mdh.getMetaFieldValue(iv_props.getProperty(CODE_MF_PRP_KEY)); - if (codes.contains(code)) { - // don't create a second entry in the array for a code already seen, including null - } else { - - codes.add(code); - - } - } - - return codes.size(); - } - - - public void consumeHits(JCas jcas, Iterator lhItr) - throws AnalysisEngineProcessException - { - - String typeId = null; - Iterator hitsByOffsetItr = organizeByOffset(lhItr); - while (hitsByOffsetItr.hasNext()) - { - Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next(); - - - FSArray ocArr = new FSArray(jcas, countUniqueCodes(hitsAtOffsetCol)); - - int ocArrIdx = 0; - - // iterate over the LookupHit objects and create - // a corresponding JCas OntologyConcept object that will - // be placed in a FSArray - Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator(); - int neBegin = -1; - int neEnd = -1; - Set codes = new HashSet(); - while (lhAtOffsetItr.hasNext()) - { - LookupHit lh = (LookupHit) lhAtOffsetItr.next(); - neBegin = lh.getStartOffset(); - neEnd = lh.getEndOffset(); - - MetaDataHit mdh = lh.getDictMetaDataHit(); - - String code = mdh.getMetaFieldValue(iv_props.getProperty(CODE_MF_PRP_KEY)); - if (codes.contains(code)) { - // don't create a second entry in the array for a code already seen, including null - } else { - - OntologyConcept oc = new OntologyConcept(jcas); - oc.setCode(code); - oc.setCodingScheme(iv_props.getProperty(CODING_SCHEME_PRP_KEY)); - - if(iv_props.getProperty(TYPE_ID_FIELD) != null) { - typeId = iv_props.getProperty(TYPE_ID_FIELD);//mdh.getMetaFieldValue(iv_props.getProperty(TYPE_ID_FIELD)); - } - - ocArr.set(ocArrIdx, oc); - ocArrIdx++; - - codes.add(code); - - } - } - - int tid=CONST.NE_TYPE_ID_UNKNOWN; - if(typeId != null){ - try { - tid = Integer.parseInt(typeId); - } catch ( NumberFormatException nfe ) { - tid = CONST.NE_TYPE_ID_UNKNOWN; - } - - } - - IdentifiedAnnotation neAnnot; - if (tid == CONST.NE_TYPE_ID_DRUG || tid == CONST.NE_TYPE_ID_UNKNOWN) { - neAnnot = new MedicationEventMention(jcas); - } else { - neAnnot = new EntityMention(jcas); - - } - - neAnnot.setBegin(neBegin); - neAnnot.setEnd(neEnd); - neAnnot.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_DICT_LOOKUP); - neAnnot.setOntologyConceptArr(ocArr); - neAnnot.setTypeID(tid); - neAnnot.addToIndexes(); - } - } -} \ No newline at end of file +import java.util.*; + +/** + * @author Mayo Clinic + */ +public class NamedEntityLookupConsumerImpl extends BaseLookupConsumerImpl implements LookupConsumer { + + private static final String CODE_MF_PRP_KEY = "codeMetaField"; + + private static final String CODING_SCHEME_PRP_KEY = "codingScheme"; + + private static final String TYPE_ID_FIELD = "typeIdField"; + + private final Properties _properties; + + private static int iv_maxSize; + + public NamedEntityLookupConsumerImpl( final UimaContext aCtx, final Properties props, final int maxListSize ) { + // TODO property validation could be done here + _properties = props; + iv_maxSize = maxListSize; + } + + public NamedEntityLookupConsumerImpl( final UimaContext aCtx, final Properties props ) { + // TODO property validation could be done here + _properties = props; + } + + private int countUniqueCodes( final Collection hitsAtOffset ) { + final String CODE_MF = _properties.getProperty( CODE_MF_PRP_KEY ); + final Set codes = new HashSet(); + for ( LookupHit lookupHit : hitsAtOffset ) { + final MetaDataHit mdh = lookupHit.getDictMetaDataHit(); + final String code = mdh.getMetaFieldValue( CODE_MF ); + codes.add( code ); + } + return codes.size(); + } + + /** + * {@inheritDoc} + */ + @Override + public void consumeHits( final JCas jcas, final Iterator lhItr ) throws AnalysisEngineProcessException { + final String TYPE_ID = _properties.getProperty( TYPE_ID_FIELD ); + final String CODE_MF = _properties.getProperty( CODE_MF_PRP_KEY ); + final String CODING_SCHEME = _properties.getProperty( CODING_SCHEME_PRP_KEY ); + int typeId = CONST.NE_TYPE_ID_UNKNOWN; + if ( TYPE_ID != null ) { + try { + typeId = Integer.parseInt( TYPE_ID ); + } catch ( NumberFormatException nfe ) { + typeId = CONST.NE_TYPE_ID_UNKNOWN; + } + } + final Map> lookupHitMap = createLookupHitMap( lhItr ); + for ( Map.Entry> entry : lookupHitMap.entrySet() ) { + final int uniqueCodeCount = countUniqueCodes( entry.getValue() ); + final FSArray ocArr = new FSArray( jcas, uniqueCodeCount ); + // iterate over the LookupHit objects and create + // a corresponding JCas OntologyConcept object that will + // be placed in a FSArray + int ocArrIdx = 0; + final Set codes = new HashSet(); + for ( LookupHit lookupHit : entry.getValue() ) { + final MetaDataHit mdh = lookupHit.getDictMetaDataHit(); + final String code = mdh.getMetaFieldValue( CODE_MF ); + if ( !codes.contains( code ) ) { + // create only first entry in the array for a code + final OntologyConcept oc = new OntologyConcept( jcas ); + oc.setCode( code ); + oc.setCodingScheme( CODING_SCHEME ); + ocArr.set( ocArrIdx, oc ); + ocArrIdx++; + codes.add( code ); + } + } + IdentifiedAnnotation neAnnot; + if ( typeId == CONST.NE_TYPE_ID_DRUG || typeId == CONST.NE_TYPE_ID_UNKNOWN ) { + neAnnot = new MedicationEventMention( jcas ); + } else { + neAnnot = new EntityMention( jcas ); + } + final int neBegin = entry.getKey().__start; + final int neEnd = entry.getKey().__end; + neAnnot.setBegin( neBegin ); + neAnnot.setEnd( neEnd ); + neAnnot.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP ); + neAnnot.setOntologyConceptArr( ocArr ); + neAnnot.setTypeID( typeId ); + neAnnot.addToIndexes(); + } + } + +} Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/OrangeBookFilterConsumerImpl.java Mon Feb 25 22:50:42 2013 @@ -18,11 +18,6 @@ */ package org.apache.ctakes.dictionary.lookup.ae; -import java.util.Collection; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Properties; - import org.apache.ctakes.core.resource.LuceneIndexReaderResource; import org.apache.ctakes.dictionary.lookup.MetaDataHit; import org.apache.ctakes.dictionary.lookup.vo.LookupHit; @@ -32,161 +27,122 @@ import org.apache.ctakes.typesystem.type import org.apache.ctakes.typesystem.type.textsem.MedicationEventMention; import org.apache.log4j.Logger; import org.apache.lucene.index.Term; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.*; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.cas.FSArray; +import org.apache.uima.resource.ResourceAccessException; + +import java.io.IOException; +import java.util.*; /** * Implementation that takes Rxnorm dictionary lookup hits and stores only the * ones that are also present in the Orange Book. - * + * * @author Mayo Clinic */ -public class OrangeBookFilterConsumerImpl extends BaseLookupConsumerImpl - implements LookupConsumer -{ - // LOG4J logger based on class name - private Logger iv_logger = Logger.getLogger(getClass().getName()); - - private final String CODE_MF_PRP_KEY = "codeMetaField"; - - private final String CODING_SCHEME_PRP_KEY = "codingScheme"; - - private final String LUCENE_FILTER_RESRC_KEY_PRP_KEY = "luceneFilterExtResrcKey"; - - private Properties iv_props; - - private IndexSearcher iv_searcher; - //ohnlp-Bugs-3296301 limits the search results to fixed 100 records. - // Added 'MaxListSize' - private int iv_maxHits; - - public OrangeBookFilterConsumerImpl(UimaContext aCtx, Properties props, int maxListSize) - throws Exception - { - // TODO property validation could be done here - iv_props = props; - iv_maxHits = maxListSize; - String resrcName = iv_props.getProperty(LUCENE_FILTER_RESRC_KEY_PRP_KEY); - LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject(resrcName); - iv_searcher = new IndexSearcher(resrc.getIndexReader()); - } - public OrangeBookFilterConsumerImpl(UimaContext aCtx, Properties props) - throws Exception - { - // TODO property validation could be done here - iv_props = props; - String resrcName = iv_props.getProperty(LUCENE_FILTER_RESRC_KEY_PRP_KEY); - LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject(resrcName); - iv_searcher = new IndexSearcher(resrc.getIndexReader()); - iv_maxHits = Integer.MAX_VALUE; - } - public void consumeHits(JCas jcas, Iterator lhItr) - throws AnalysisEngineProcessException - { - Iterator hitsByOffsetItr = organizeByOffset(lhItr); - while (hitsByOffsetItr.hasNext()) - { - Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next(); - - // iterate over the LookupHit objects - // code is only valid if the covered text is also present in the - // filter - Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator(); - int neBegin = -1; - int neEnd = -1; - Collection validCodeCol = new HashSet(); - while (lhAtOffsetItr.hasNext()) - { - LookupHit lh = (LookupHit) lhAtOffsetItr.next(); - neBegin = lh.getStartOffset(); - neEnd = lh.getEndOffset(); - - String text = jcas.getDocumentText().substring( - lh.getStartOffset(), - lh.getEndOffset()); - text = text.trim().toLowerCase(); - - MetaDataHit mdh = lh.getDictMetaDataHit(); - String code = mdh.getMetaFieldValue(iv_props.getProperty(CODE_MF_PRP_KEY)); - - if (isValid("trade_name", text) || isValid("ingredient", text)) - { - validCodeCol.add(code); - } - else - { - iv_logger.warn("Filtered out: "+text); - } - } - - if (validCodeCol.size() > 0) - { - FSArray ocArr = createOntologyConceptArr(jcas, validCodeCol); - IdentifiedAnnotation neAnnot = new MedicationEventMention(jcas); // medication NEs are EventMention - neAnnot.setTypeID(CONST.NE_TYPE_ID_DRUG); - neAnnot.setBegin(neBegin); - neAnnot.setEnd(neEnd); - neAnnot.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_DICT_LOOKUP); - neAnnot.setOntologyConceptArr(ocArr); - neAnnot.addToIndexes(); - } - } - } - - /** - * For each valid code, a corresponding JCas OntologyConcept object is - * created and stored in a FSArray. - * - * @param jcas - * @param validCodeCol - * @return - */ - private FSArray createOntologyConceptArr(JCas jcas, Collection validCodeCol) - { - FSArray ocArr = new FSArray(jcas, validCodeCol.size()); - int ocArrIdx = 0; - Iterator validCodeItr = validCodeCol.iterator(); - while (validCodeItr.hasNext()) - { - String validCode = (String) validCodeItr.next(); - OntologyConcept oc = new OntologyConcept(jcas); - oc.setCode(validCode); - oc.setCodingScheme(iv_props.getProperty(CODING_SCHEME_PRP_KEY)); - - ocArr.set(ocArrIdx, oc); - ocArrIdx++; - } - return ocArr; - } - - private boolean isValid(String fieldName, String str) - throws AnalysisEngineProcessException - { - try - { - Query q = new TermQuery(new Term(fieldName, str)); - - TopDocs topDoc = iv_searcher.search(q, iv_maxHits); - ScoreDoc[] hits = topDoc.scoreDocs; - if ((hits != null) && (hits.length > 0)) - { - return true; - } - else - { - return false; +public class OrangeBookFilterConsumerImpl extends BaseLookupConsumerImpl implements LookupConsumer { + // LOG4J logger based on class name + private final Logger iv_logger = Logger.getLogger( getClass().getName() ); + + static private final String CODE_MF_PRP_KEY = "codeMetaField"; + + static private final String CODING_SCHEME_PRP_KEY = "codingScheme"; + + static private final String LUCENE_FILTER_RESRC_KEY_PRP_KEY = "luceneFilterExtResrcKey"; + + final private Properties _properties; + + final private IndexSearcher _indexSearcher; + //ohnlp-Bugs-3296301 limits the search results to fixed 100 records. + // Added 'MaxListSize' + final private int _maxListSize; + + public OrangeBookFilterConsumerImpl( final UimaContext aCtx, final Properties props, final int maxListSize ) + throws ResourceAccessException, NullPointerException { + // TODO property validation could be done here + _properties = props; + _maxListSize = maxListSize; + final String resrcName = _properties.getProperty( LUCENE_FILTER_RESRC_KEY_PRP_KEY ); + // UimaContext.getResourceObject(..) throws ResourceAccessException + final LuceneIndexReaderResource resrc = (LuceneIndexReaderResource) aCtx.getResourceObject( resrcName ); + // Possible npE with resrc.getIndexReader() + _indexSearcher = new IndexSearcher( resrc.getIndexReader() ); + } + + public OrangeBookFilterConsumerImpl( final UimaContext aCtx, final Properties props ) + throws Exception { + this( aCtx, props, Integer.MAX_VALUE ); + } + + /** + * {@inheritDoc} + */ + @Override + public void consumeHits( final JCas jcas, final Iterator lhItr ) throws AnalysisEngineProcessException { + final String CODE_MF = _properties.getProperty( CODE_MF_PRP_KEY ); + final Map> lookupHitMap = createLookupHitMap( lhItr ); + for ( Map.Entry> entry : lookupHitMap.entrySet() ) { + // iterate over the LookupHit objects + // code is only valid if the covered text is also present in the filter + final int neBegin = entry.getKey().__start; + final int neEnd = entry.getKey().__end; + final String text = jcas.getDocumentText().substring( neBegin, neEnd ).trim().toLowerCase(); + final boolean isValid = isValid( "trade_name", text ) || isValid( "ingredient", text ); + if ( isValid ) { + final Set validCodes = new HashSet(); + for ( LookupHit lookupHit : entry.getValue() ) { + final MetaDataHit mdh = lookupHit.getDictMetaDataHit(); + final String code = mdh.getMetaFieldValue( CODE_MF ); + validCodes.add( code ); } - } - catch (Exception e) - { - throw new AnalysisEngineProcessException(e); - } - } -} \ No newline at end of file + final FSArray ocArr = createOntologyConceptArr( jcas, validCodes ); + IdentifiedAnnotation neAnnot = new MedicationEventMention( jcas ); // medication NEs are EventMention + neAnnot.setTypeID( CONST.NE_TYPE_ID_DRUG ); + neAnnot.setBegin( neBegin ); + neAnnot.setEnd( neEnd ); + neAnnot.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP ); + neAnnot.setOntologyConceptArr( ocArr ); + neAnnot.addToIndexes(); + } else { + iv_logger.warn( "Filtered out: " + text ); + } + } + } + + /** + * For each valid code, a corresponding JCas OntologyConcept object is + * created and stored in a FSArray. + * + * @param jcas - + * @param validCodes - + * @return - + */ + private FSArray createOntologyConceptArr( final JCas jcas, final Collection validCodes ) { + final String CODING_SCHEME = _properties.getProperty( CODING_SCHEME_PRP_KEY ); + final FSArray ocArr = new FSArray( jcas, validCodes.size() ); + int ocArrIdx = 0; + for ( String validCode : validCodes ) { + final OntologyConcept oc = new OntologyConcept( jcas ); + oc.setCode( validCode ); + oc.setCodingScheme( CODING_SCHEME ); + ocArr.set( ocArrIdx, oc ); + ocArrIdx++; + } + return ocArr; + } + + private boolean isValid( final String fieldName, final String text ) throws AnalysisEngineProcessException { + try { + final Query q = new TermQuery( new Term( fieldName, text ) ); + final TopDocs topDoc = _indexSearcher.search( q, _maxListSize ); + final ScoreDoc[] hits = topDoc.scoreDocs; + return hits != null && hits.length > 0; + } catch ( IOException ioE ) { + // thrown by IndexSearcher.search(..) + throw new AnalysisEngineProcessException( ioE ); + } + } +} Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/ThreadedDictionaryLookupAnnotator.java Mon Feb 25 22:50:42 2013 @@ -260,12 +260,14 @@ public class ThreadedDictionaryLookupAnn static private class LookupHitKey { final private int __start; final private int __end; + final private int __hashCode; private LookupHitKey( final LookupHit lookupHit ) { __start = lookupHit.getStartOffset(); __end = lookupHit.getEndOffset(); + __hashCode = 1000 * __end + __start; } public int hashCode() { - return 10000 *__start + __end; + return __hashCode; } public boolean equals( final Object object ) { return object instanceof LookupHitKey Modified: incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java?rev=1449951&r1=1449950&r2=1449951&view=diff ============================================================================== --- incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java (original) +++ incubator/ctakes/trunk/ctakes-dictionary-lookup/src/main/java/org/apache/ctakes/dictionary/lookup/ae/UmlsToSnomedConsumerImpl.java Mon Feb 25 22:50:42 2013 @@ -42,8 +42,7 @@ import java.util.*; * * @author Mayo Clinic */ -public abstract class UmlsToSnomedConsumerImpl extends BaseLookupConsumerImpl implements - LookupConsumer { +public abstract class UmlsToSnomedConsumerImpl extends BaseLookupConsumerImpl implements LookupConsumer { static private final String CUI_MF_PRP_KEY = "cuiMetaField"; static private final String TUI_MF_PRP_KEY = "tuiMetaField"; @@ -94,37 +93,125 @@ public abstract class UmlsToSnomedConsum protected abstract Set getSnomedCodes( final String umlsCode ) throws SQLException, DictionaryException; - public void consumeHits( final JCas jcas, final Iterator lhItr ) throws AnalysisEngineProcessException { +// /** +// * {@inheritDoc} +// */ +// @Override +// public void consumeHits( final JCas jcas, final Iterator lhItr ) throws AnalysisEngineProcessException { +// try { +// final String cuiPropKey = props.getProperty( CUI_MF_PRP_KEY ); +// final String tuiPropKey = props.getProperty( TUI_MF_PRP_KEY ); +// final Iterator hitsByOffsetItr = organizeByOffset( lhItr ); +// while ( hitsByOffsetItr.hasNext() ) { +// final Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next(); +// +// // Iterate over the LookupHit objects and group Snomed codes by NE type +// // For each NE type for which there is a hit, get all the Snomed codes +// // that map to the given CUI. +// +// // Use key "cui,tui" to avoid duplicates at this offset +// final Set cuiTuiSet = new HashSet(); +// +// // key = type of named entity (java.lang.Integer) +// // val = set of UmlsConcept objects (java.util.Set) +// final Map> conceptMap = new HashMap>(); +// +// final Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator(); +// int neBegin = -1; +// int neEnd = -1; +// while ( lhAtOffsetItr.hasNext() ) { +// final LookupHit lh = (LookupHit) lhAtOffsetItr.next(); +// neBegin = lh.getStartOffset(); +// neEnd = lh.getEndOffset(); +// +// final MetaDataHit mdh = lh.getDictMetaDataHit(); +// final String cui = mdh.getMetaFieldValue( cuiPropKey ); +// final String tui = mdh.getMetaFieldValue( tuiPropKey ); +// +// //String text = lh.getDictMetaDataHit().getMetaFieldValue("text"); +// if ( !_validTuiSet.contains( tui ) ) { +// continue; +// } +// final String cuiTuiKey = getUniqueKey( cui, tui ); +// if ( cuiTuiSet.contains( cuiTuiKey ) ) { +// continue; +// } +// cuiTuiSet.add( cuiTuiKey ); +// final Set snomedCodeSet = getSnomedCodes( cui ); +// if ( !snomedCodeSet.isEmpty() ) { +// final Integer neType = getNamedEntityType( tui ); +// Set conceptSet; +// if ( conceptMap.containsKey( neType ) ) { +// conceptSet = conceptMap.get( neType ); +// } else { +// conceptSet = new HashSet(); +// } +// final Collection conceptCol = createConceptCol( jcas, cui, tui, snomedCodeSet ); +// conceptSet.addAll( conceptCol ); +// conceptMap.put( neType, conceptSet ); +// } +// } +// +// final Collection conceptKeys = conceptMap.keySet(); +// for ( Integer conceptKey : conceptKeys ) { +// final Set conceptSet = conceptMap.get( conceptKey ); +// +// // Skip updating CAS if all Concepts for this type were filtered out +// // for this span. +// if ( !conceptSet.isEmpty() ) { +// FSArray conceptArr = new FSArray( jcas, conceptSet.size() ); +// int arrIdx = 0; +// for ( UmlsConcept umlsConcept : conceptSet ) { +// conceptArr.set( arrIdx, umlsConcept ); +// arrIdx++; +// } +// +// IdentifiedAnnotation neAnnot; +// if ( conceptKey == CONST.NE_TYPE_ID_DRUG ) { +// neAnnot = new MedicationEventMention( jcas ); +// } else { +// neAnnot = new EntityMention( jcas ); +// } +// neAnnot.setTypeID( conceptKey ); +// neAnnot.setBegin( neBegin ); +// neAnnot.setEnd( neEnd ); +// neAnnot.setDiscoveryTechnique( CONST.NE_DISCOVERY_TECH_DICT_LOOKUP ); +// neAnnot.setOntologyConceptArr( conceptArr ); +// neAnnot.addToIndexes(); +// } +// } +// } +// } catch ( Exception e ) { +// throw new AnalysisEngineProcessException( e ); +// } +// } + + + /** + * {@inheritDoc} + */ + @Override + public void consumeHits( final JCas jcas, final Iterator lhItr ) throws AnalysisEngineProcessException { try { final String cuiPropKey = props.getProperty( CUI_MF_PRP_KEY ); final String tuiPropKey = props.getProperty( TUI_MF_PRP_KEY ); - final Iterator hitsByOffsetItr = organizeByOffset( lhItr ); - while ( hitsByOffsetItr.hasNext() ) { - final Collection hitsAtOffsetCol = (Collection) hitsByOffsetItr.next(); - - // Iterate over the LookupHit objects and group Snomed codes by NE type - // For each NE type for which there is a hit, get all the Snomed codes - // that map to the given CUI. - + final Map> lookupHitMap = createLookupHitMap( lhItr ); + // iterate over the LookupHit objects + for ( Map.Entry> entry : lookupHitMap.entrySet() ) { + // code is only valid if the covered text is also present in the filter + final int neBegin = entry.getKey().__start; + final int neEnd = entry.getKey().__end; // Use key "cui,tui" to avoid duplicates at this offset final Set cuiTuiSet = new HashSet(); - - // key = type of named entity (java.lang.Integer) - // val = set of UmlsConcept objects (java.util.Set) + // key = type of named entity, val = set of UmlsConcept objects final Map> conceptMap = new HashMap>(); - - final Iterator lhAtOffsetItr = hitsAtOffsetCol.iterator(); - int neBegin = -1; - int neEnd = -1; - while ( lhAtOffsetItr.hasNext() ) { - final LookupHit lh = (LookupHit) lhAtOffsetItr.next(); - neBegin = lh.getStartOffset(); - neEnd = lh.getEndOffset(); - - final MetaDataHit mdh = lh.getDictMetaDataHit(); + // Iterate over the LookupHit objects and group Snomed codes by NE type + // For each NE type for which there is a hit, get all the Snomed codes + // that map to the given CUI. + for ( LookupHit lookupHit : entry.getValue() ) { + final MetaDataHit mdh = lookupHit.getDictMetaDataHit(); final String cui = mdh.getMetaFieldValue( cuiPropKey ); final String tui = mdh.getMetaFieldValue( tuiPropKey ); - //String text = lh.getDictMetaDataHit().getMetaFieldValue("text"); if ( !_validTuiSet.contains( tui ) ) { continue; @@ -142,21 +229,18 @@ public abstract class UmlsToSnomedConsum conceptSet = conceptMap.get( neType ); } else { conceptSet = new HashSet(); + conceptMap.put( neType, conceptSet ); } final Collection conceptCol = createConceptCol( jcas, cui, tui, snomedCodeSet ); conceptSet.addAll( conceptCol ); - conceptMap.put( neType, conceptSet ); } } - final Collection conceptKeys = conceptMap.keySet(); - for ( Integer conceptKey : conceptKeys ) { - final Set conceptSet = conceptMap.get( conceptKey ); - - // Skip updating CAS if all Concepts for this type were filtered out - // for this span. + for ( Map.Entry> conceptEntry : conceptMap.entrySet() ) { + final Set conceptSet = conceptEntry.getValue(); + // Skip updating CAS if all Concepts for this type were filtered out for this span. if ( !conceptSet.isEmpty() ) { - FSArray conceptArr = new FSArray( jcas, conceptSet.size() ); + final FSArray conceptArr = new FSArray( jcas, conceptSet.size() ); int arrIdx = 0; for ( UmlsConcept umlsConcept : conceptSet ) { conceptArr.set( arrIdx, umlsConcept ); @@ -164,6 +248,7 @@ public abstract class UmlsToSnomedConsum } IdentifiedAnnotation neAnnot; + final int conceptKey = conceptEntry.getKey(); if ( conceptKey == CONST.NE_TYPE_ID_DRUG ) { neAnnot = new MedicationEventMention( jcas ); } else { @@ -183,6 +268,7 @@ public abstract class UmlsToSnomedConsum } } + private int getNamedEntityType( final String tui ) throws IllegalArgumentException { if ( _medicationSet.contains( tui ) ) { return CONST.NE_TYPE_ID_DRUG;