incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1433586 - /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/
Date Tue, 15 Jan 2013 19:14:10 GMT
Author: dligach
Date: Tue Jan 15 19:14:10 2013
New Revision: 1433586

URL: http://svn.apache.org/viewvc?rev=1433586&view=rev
Log:
Added baseline 2 for degree_of

Added:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2DegreeOfRelationExtractorAnnotator.java
Modified:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java?rev=1433586&r1=1433585&r2=1433586&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java
Tue Jan 15 19:14:10 2013
@@ -36,7 +36,8 @@ import org.cleartk.classifier.Feature;
 import org.uimafit.util.JCasUtil;
 
 /**
- * 
+ * Annotated degree_of relation in sentences containing a single entity mention
+ * of a valid degree_of type and a single modifier.
  */
 public class Baseline1DegreeOfRelationExtractorAnnotator extends RelationExtractorAnnotator
{
 

Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2DegreeOfRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2DegreeOfRelationExtractorAnnotator.java?rev=1433586&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2DegreeOfRelationExtractorAnnotator.java
(added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2DegreeOfRelationExtractorAnnotator.java
Tue Jan 15 19:14:10 2013
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.ae.baselines;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.CleartkProcessingException;
+import org.cleartk.classifier.Feature;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.base.Function;
+import com.google.common.base.Functions;
+import com.google.common.collect.Ordering;
+
+
+/**
+ * Annotated degree_of relation between two entities in sentences with multiple modifiers
(arg2)
+ * and a single legitimate degree_of arg1 (i.e. disiease/disorder or sign/symptom). Use the
pair of
+ * arguments that have the shortest distance to each other. 
+ */
+public class Baseline2DegreeOfRelationExtractorAnnotator extends RelationExtractorAnnotator
{
+
+	@Override
+	public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+			JCas identifiedAnnotationView, Sentence sentence) {
+		
+		List<EntityMention> entities = JCasUtil.selectCovered(identifiedAnnotationView, EntityMention.class,
sentence);
+		List<Modifier> modifiers = JCasUtil.selectCovered(identifiedAnnotationView, Modifier.class,
sentence);
+		
+    // look for sentences with multiple modifiers/arg2s and a single entity/arg1
+		if(! (modifiers.size() > 1 && entities.size() == 1)) {
+		  return new ArrayList<IdentifiedAnnotationPair>();
+		}
+		
+		// TODO: it seems like we never get here
+		//       i.e. no sentences with multiple modifiers and a single entity
+		
+		List<IdentifiedAnnotationPair> pairs = new ArrayList<IdentifiedAnnotationPair>();
+		for (EntityMention entity : entities) {
+			for (Modifier modifier : modifiers) {
+				pairs.add(new IdentifiedAnnotationPair(entity, modifier));
+			}
+		}
+		
+    // compute distance between entities for the pairs where entity types are correct
+    HashMap<IdentifiedAnnotationPair, Integer> distanceLookup = new HashMap<IdentifiedAnnotationPair,
Integer>();
+    for(IdentifiedAnnotationPair pair : pairs) {
+      if(Utils.validateDegreeOfArgumentTypes(pair)) {
+        try {
+          int distance = Utils.getDistance(identifiedAnnotationView.getView(CAS.NAME_DEFAULT_SOFA),
pair);
+          distanceLookup.put(pair, distance);
+        } catch (CASException e) {
+          System.out.println("couldn't get default sofa");
+          break;
+        }
+      } 
+    }
+    if(distanceLookup.isEmpty()) {
+      return new ArrayList<IdentifiedAnnotationPair>(); // no pairs with suitable argument
types
+    }
+
+    // find the pair where the distance between entities is the smallest and return it
+    List<IdentifiedAnnotationPair> rankedPairs = new ArrayList<IdentifiedAnnotationPair>(distanceLookup.keySet());
+    Function<IdentifiedAnnotationPair, Integer> getValue = Functions.forMap(distanceLookup);
+    Collections.sort(rankedPairs, Ordering.natural().onResultOf(getValue));
+
+    List<IdentifiedAnnotationPair> result = new ArrayList<IdentifiedAnnotationPair>();
+    result.add(rankedPairs.get(0));
+
+    System.out.println(sentence.getCoveredText());
+    System.out.println("arg1: " + result.get(0).getArg1().getCoveredText());
+    System.out.println("arg2: " + result.get(0).getArg2().getCoveredText());
+    System.out.println();
+    
+    return result;
+	}
+	
+	
+
+	@Override
+	protected String getRelationCategory(
+			Map<List<Annotation>, BinaryTextRelation> relationLookup,
+			IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+		BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+		return (relation != null) ? relation.getCategory() : NO_RELATION_CATEGORY;
+	}
+
+  @Override
+  public String classify(List<Feature> features) throws CleartkProcessingException
{
+    return "degree_of";
+  }
+}

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java?rev=1433586&r1=1433585&r2=1433586&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java
Tue Jan 15 19:14:10 2013
@@ -27,9 +27,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
-import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
 import org.apache.ctakes.typesystem.type.textspan.Sentence;
@@ -46,7 +44,7 @@ import com.google.common.base.Functions;
 import com.google.common.collect.Ordering;
 
 /**
- * Annotate location_of relation between two entities in sentences with multiple anatomica
sites
+ * Annotate location_of relation between two entities in sentences with multiple anatomical
sites
  * and a single legitimate location_of arg2. Use the pair of arguments that are the closest
to each other.
  * This implementation assumes classifyBothDirections = true.
  */
@@ -110,10 +108,11 @@ public class Baseline2EntityMentionPairR
 		for(IdentifiedAnnotationPair pair : pairs) {
 		  if(Utils.validateLocationOfArgumentTypes(pair)) {
 		    try {
-          int distance = getDistance(identifiedAnnotationView.getView(CAS.NAME_DEFAULT_SOFA),
pair);
+          int distance = Utils.getDistance(identifiedAnnotationView.getView(CAS.NAME_DEFAULT_SOFA),
pair);
           distanceLookup.put(pair, distance);
         } catch (CASException e) {
-          e.printStackTrace();
+          System.out.println("couldn't get default sofa");
+          break;
         }
 		  } 
 		}
@@ -138,15 +137,6 @@ public class Baseline2EntityMentionPairR
     return result;
 	}
 	
-	/* 
-	 * Calculate the distance (in tokens) between two identified annotations.
-	 */
-	private static int getDistance(JCas jCas, IdentifiedAnnotationPair pair)  {
-	  
-	  List<BaseToken> baseTokens = JCasUtil.selectBetween(jCas, BaseToken.class, pair.getArg1(),
pair.getArg2());
-	  return baseTokens.size();
-	}
-	
 	@Override
 	protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation>
relationLookup,
 			IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java?rev=1433586&r1=1433585&r2=1433586&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java
(original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java
Tue Jan 15 19:14:10 2013
@@ -2,9 +2,13 @@ package org.apache.ctakes.relationextrac
 
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.List;
 
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.jcas.JCas;
+import org.uimafit.util.JCasUtil;
 
 public class Utils {
 
@@ -37,7 +41,7 @@ public class Utils {
    * Are entity types of the arguments valid for degree_of relation? 
    * The following are the valid combinations:
    * 
-   * degree-of(disorder/2, modifier)
+   * degree-of(disease/disorder/2, modifier)
    * degree-of(sign/symptom/3, modifier)
    */
   public static boolean validateDegreeOfArgumentTypes(IdentifiedAnnotationPair pair) {
@@ -46,14 +50,21 @@ public class Utils {
     HashSet<Integer> okArg1Types = new HashSet<Integer>(Arrays.asList(2, 3));
 
     IdentifiedAnnotation arg1 = pair.getArg1(); // Argument (should be either disease/disorder
or sign/symptom
-    IdentifiedAnnotation arg2 = pair.getArg2(); // Related_to (should be a modifier)
     int type1 = arg1.getTypeID();
-    int type2 = arg2.getTypeID();
 
     if(okArg1Types.contains(type1)) {
-      return true; // assume arg2 is a moddifier
+      return true; // assume arg2 is a modifier
     }
 
     return false;
   }
+  
+  /** 
+   * Calculate the distance (in tokens) between two identified annotations.
+   */
+  public static int getDistance(JCas jCas, IdentifiedAnnotationPair pair)  {
+    
+    List<BaseToken> baseTokens = JCasUtil.selectBetween(jCas, BaseToken.class, pair.getArg1(),
pair.getArg2());
+    return baseTokens.size();
+  }
 }



Mime
View raw message