ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1487583 - /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java
Date Wed, 29 May 2013 18:07:58 GMT
Author: dligach
Date: Wed May 29 18:07:57 2013
New Revision: 1487583

URL: http://svn.apache.org/r1487583
Log:
Baseline for degree_of requested by reviewer 2 (second round of reviews)

Added:
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java

Added: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java?rev=1487583&view=auto
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java
(added)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3DegreeOfRelationExtractorAnnotator.java
Wed May 29 18:07:57 2013
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.ae.baselines;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.base.Functions;
+import com.google.common.collect.Ordering;
+
+/**
+ * This baseline links each modifier with the closest entity of a type  
+ * that's suitable for degree_of, as long as there is no intervening modifier. 
+ */
+public class Baseline3DegreeOfRelationExtractorAnnotator extends RelationExtractorAnnotator
{
+	
+	@Override
+	public Class<? extends Annotation> getCoveringClass(){
+		return Sentence.class;
+	}
+	
+	@Override
+	public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+			JCas identifiedAnnotationView, Annotation sentence) {
+
+		List<EntityMention> entityMentions = JCasUtil.selectCovered(
+				identifiedAnnotationView,
+				EntityMention.class,
+				sentence);
+
+		List<Modifier> modifiers = JCasUtil.selectCovered(
+		    identifiedAnnotationView,
+		    Modifier.class,
+		    sentence);
+		
+		List<EntityMention> entitiesSuitableForDegreeOf = getEntitiesSuitableForDegreeOf(entityMentions);
+		
+		if((entitiesSuitableForDegreeOf.size() < 1) || (modifiers.size() < 1)) {
+		  return new ArrayList<IdentifiedAnnotationPair>();
+		}
+		
+		List<IdentifiedAnnotationPair> result = new ArrayList<IdentifiedAnnotationPair>();
+		Set<EntityMention> alreadyLinked = new HashSet<EntityMention>();
+		
+		for(Modifier modifier : modifiers) {
+		  EntityMention nearestEntity = getNearestEntity(identifiedAnnotationView, modifier, entitiesSuitableForDegreeOf);
+		  
+		  // don't link if there's an another modifier between this one and its nearest entity
+		  if(checkForModifierBetween(identifiedAnnotationView, modifier, nearestEntity)) {
+		    continue;
+		  }
+		  
+		  // make sure this entity isn't already linked to an anatomical site
+		  if(! alreadyLinked.contains(nearestEntity)) {
+		    result.add(new IdentifiedAnnotationPair(nearestEntity, modifier));
+		    alreadyLinked.add(nearestEntity);
+		  }
+		}
+
+    return result;
+	}
+	
+  /*
+   * Return entity mentions that qualityf to be the arg1 of degree_of relation (i.e. 2, 3)
+   */
+  private static List<EntityMention> getEntitiesSuitableForDegreeOf(List<EntityMention>
entityMentions) {
+    
+    HashSet<Integer> okArg1Types = new HashSet<Integer>(Arrays.asList(2, 3));
+    List<EntityMention> suitableEntities = new ArrayList<EntityMention>();
+    
+    for(EntityMention entityMention : entityMentions) {
+      if(okArg1Types.contains(entityMention.getTypeID())) {
+        suitableEntities.add(entityMention);
+      }
+    }
+    
+    return suitableEntities;
+  }
+	
+  /*
+   * Find the entity nearest to the modifier
+   */
+	private static EntityMention getNearestEntity(JCas jCas, Modifier modifier, List<EntityMention>
entityMentions) {
+
+	  // token distance from modifier to other entity mentions
+	  Map<EntityMention, Integer> distanceToEntities = new HashMap<EntityMention, Integer>();
+
+	  for(EntityMention entityMention : entityMentions) {
+	    List<BaseToken> baseTokens = JCasUtil.selectBetween(jCas, BaseToken.class, modifier,
entityMention);
+	    distanceToEntities.put(entityMention, baseTokens.size());
+	  }
+	  
+    List<EntityMention> sortedEntityMentions = new ArrayList<EntityMention>(distanceToEntities.keySet());
+    Function<EntityMention, Integer> getValue = Functions.forMap(distanceToEntities);
+    Collections.sort(sortedEntityMentions, Ordering.natural().onResultOf(getValue));
+    
+    return sortedEntityMentions.get(0);
+	}
+  
+  /*
+   * Return true if there's a modifier between the given modifier and an entity.
+   */
+  private static boolean checkForModifierBetween(JCas jCas, Modifier modifier, EntityMention
entity) {
+    
+    List<Modifier> modifiers = JCasUtil.selectBetween(jCas, Modifier.class, modifier,
entity);
+    if(modifiers.size() > 0) {
+      return true;
+    } 
+
+    return false;
+  }
+  
+  @Override
+  public String classify(List<Feature> features) {
+    return "degree_of";
+  }
+}



Mime
View raw message