Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 49966EDA3 for ; Wed, 29 May 2013 15:21:22 +0000 (UTC) Received: (qmail 5991 invoked by uid 500); 29 May 2013 15:21:22 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 5487 invoked by uid 500); 29 May 2013 15:21:20 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 5418 invoked by uid 99); 29 May 2013 15:21:19 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 May 2013 15:21:19 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 May 2013 15:21:17 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 61A732388847; Wed, 29 May 2013 15:20:58 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1487513 - /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/BaselineEntityMentionPairRelationExtractorAnnotator.java Date: Wed, 29 May 2013 15:20:58 -0000 To: commits@ctakes.apache.org From: dligach@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130529152058.61A732388847@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: dligach Date: Wed May 29 15:20:58 2013 New Revision: 1487513 URL: http://svn.apache.org/r1487513 Log: Baseline requested by reviewer 2 (second set of revisions) Added: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/BaselineEntityMentionPairRelationExtractorAnnotator.java Added: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/BaselineEntityMentionPairRelationExtractorAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/BaselineEntityMentionPairRelationExtractorAnnotator.java?rev=1487513&view=auto ============================================================================== --- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/BaselineEntityMentionPairRelationExtractorAnnotator.java (added) +++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/BaselineEntityMentionPairRelationExtractorAnnotator.java Wed May 29 15:20:58 2013 @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.relationextractor.ae.baselines; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator; +import org.apache.ctakes.typesystem.type.syntax.BaseToken; +import org.apache.ctakes.typesystem.type.textsem.EntityMention; +import org.apache.ctakes.typesystem.type.textspan.Sentence; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.cleartk.classifier.Feature; +import org.uimafit.util.JCasUtil; + +import com.google.common.base.Function; +import com.google.common.base.Functions; +import com.google.common.collect.Ordering; + +/** + * This baseline links each anatomical site with the closest entity of a type + * that's suitable for location_of, as long as there is no intervening anatomical site. + */ +public class BaselineEntityMentionPairRelationExtractorAnnotator extends RelationExtractorAnnotator { + + @Override + public Class getCoveringClass(){ + return Sentence.class; + } + + @Override + public List getCandidateRelationArgumentPairs( + JCas identifiedAnnotationView, Annotation sentence) { + + List entityMentions = JCasUtil.selectCovered( + identifiedAnnotationView, + EntityMention.class, + sentence); + + List anatomicalSites = getAnatomicalSites(entityMentions); + List entitiesSuitableForLocationOf = getEntitiesSuitableForLocationOf(entityMentions); + + if((anatomicalSites.size() < 1) || (entitiesSuitableForLocationOf.size() < 1)) { + return new ArrayList(); + } + + List result = new ArrayList(); + Set alreadyLinked = new HashSet(); + + for(EntityMention anatomicalSite : anatomicalSites) { + EntityMention nearestEntity = getNearestEntity(identifiedAnnotationView, anatomicalSite, entitiesSuitableForLocationOf); + + // don't link if there's an another anatomical site between this one and its nearest entity + if(checkForAnatomicalSiteBetween(identifiedAnnotationView, anatomicalSite, nearestEntity)) { + continue; + } + + // make sure this entity isn't already linked to an anatomical site + if(! alreadyLinked.contains(nearestEntity)) { + result.add(new IdentifiedAnnotationPair(anatomicalSite, nearestEntity)); + alreadyLinked.add(nearestEntity); + } + } + + return result; + } + + /* + * Return entity mentions that are anatomical types (i.e. typeId is 6) + */ + private static List getAnatomicalSites(List entityMentions) { + + List anatomicalSites = new ArrayList(); + + for(EntityMention entityMention : entityMentions) { + if(entityMention.getTypeID() == 6) { + anatomicalSites.add(entityMention); + } + } + + return anatomicalSites; + } + + /* + * Return entity mentions that qualityf to be the arg2 of location_of relation (i.e. 2, 3, or 5) + */ + private List getEntitiesSuitableForLocationOf(List entityMentions) { + + HashSet okArg2Types = new HashSet(Arrays.asList(2, 3, 5)); + List suitableEntities = new ArrayList(); + + for(EntityMention entityMention : entityMentions) { + if(okArg2Types.contains(entityMention.getTypeID())) { + suitableEntities.add(entityMention); + } + } + + return suitableEntities; + } + + /* + * Find the entity nearest to the anatomical site + */ + private static EntityMention getNearestEntity(JCas jCas, EntityMention anatomicalSite, List entityMentions) { + + // token distance from anatomical site to other entity mentions + Map distanceToEntities = new HashMap(); + + for(EntityMention entityMention : entityMentions) { + List baseTokens = JCasUtil.selectBetween(jCas, BaseToken.class, anatomicalSite, entityMention); + distanceToEntities.put(entityMention, baseTokens.size()); + } + + List sortedEntityMentions = new ArrayList(distanceToEntities.keySet()); + Function getValue = Functions.forMap(distanceToEntities); + Collections.sort(sortedEntityMentions, Ordering.natural().onResultOf(getValue)); + + return sortedEntityMentions.get(0); + } + + /* + * Return true if there's an anatomical site entity mention between two entities. + */ + private static boolean checkForAnatomicalSiteBetween(JCas jCas, EntityMention entity1, EntityMention entity2) { + + for(EntityMention entityMention : JCasUtil.selectBetween(jCas, EntityMention.class, entity1, entity2)) { + if(entityMention.getTypeID() == 6) { + return true; + } + } + + return false; + } + + @Override + public String classify(List features) { + return "location_of"; + } +}