incubator-ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1433160 - in /incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor: ae/baselines/ eval/
Date Mon, 14 Jan 2013 21:52:24 GMT
Author: dligach
Date: Mon Jan 14 21:52:23 2013
New Revision: 1433160

URL: http://svn.apache.org/viewvc?rev=1433160&view=rev
Log:
initial version of baseline1 for degree_of

Added:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java
Modified:
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1EntityMentionPairRelationExtractorAnnotator.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java
    incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java

Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java?rev=1433160&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java
(added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1DegreeOfRelationExtractorAnnotator.java
Mon Jan 14 21:52:23 2013
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.relationextractor.ae.baselines;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.CleartkProcessingException;
+import org.cleartk.classifier.Feature;
+import org.uimafit.util.JCasUtil;
+
+/**
+ * 
+ */
+public class Baseline1DegreeOfRelationExtractorAnnotator extends RelationExtractorAnnotator
{
+
+	@Override
+	public List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+			JCas identifiedAnnotationView, Sentence sentence) {
+		
+		List<EntityMention> entities = JCasUtil.selectCovered(identifiedAnnotationView, EntityMention.class,
sentence);
+		List<Modifier> modifiers = JCasUtil.selectCovered(identifiedAnnotationView, Modifier.class,
sentence);
+		
+		// look for sentences with one entity and one modifier
+    List<IdentifiedAnnotationPair> result = new ArrayList<IdentifiedAnnotationPair>();
+		if((entities.size() == 1) && (modifiers.size() == 1)) {
+		  IdentifiedAnnotationPair pair = new IdentifiedAnnotationPair(entities.get(0), modifiers.get(0));
+		  if(Utils.validateDegreeOfArgumentTypes(pair)) {
+		    System.out.println(sentence.getCoveredText());
+		    System.out.println("arg1: " + pair.getArg1().getCoveredText());
+		    System.out.println("arg2: " + pair.getArg2().getCoveredText());
+		    System.out.println();
+		    result.add(pair);
+		  }
+		}
+				
+		return result;
+	}
+
+	@Override
+	protected String getRelationCategory(
+			Map<List<Annotation>, BinaryTextRelation> relationLookup,
+			IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+		BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+		return (relation != null) ? relation.getCategory() : NO_RELATION_CATEGORY;
+	}
+
+  @Override
+  public String classify(List<Feature> features) throws CleartkProcessingException
{
+    return "degree_of";
+  }
+}

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1EntityMentionPairRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1EntityMentionPairRelationExtractorAnnotator.java?rev=1433160&r1=1433159&r2=1433160&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1EntityMentionPairRelationExtractorAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline1EntityMentionPairRelationExtractorAnnotator.java
Mon Jan 14 21:52:23 2013
@@ -20,12 +20,10 @@ package org.apache.ctakes.relationextrac
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -87,7 +85,7 @@ public class Baseline1EntityMentionPairR
 		  // there are two entities in this sentence
 		  // are they of suitable types for location_of?
 		  for(IdentifiedAnnotationPair pair : pairs) {
-		    if(validateArgumentTypes(pair)) {
+		    if(Utils.validateLocationOfArgumentTypes(pair)) {
 	        System.out.println(sentence.getCoveredText());
 	        System.out.println("arg1: " + pair.getArg1().getCoveredText());
 	        System.out.println("arg2: " + pair.getArg2().getCoveredText());
@@ -104,32 +102,7 @@ public class Baseline1EntityMentionPairR
 		// for all other cases, return no entity pairs
 		return new ArrayList<IdentifiedAnnotationPair>();
 	}
-	
-	/*
-	 * Are entity types of the arguments valid for location_of? 
-	 * The following combinations are allowed:
-	 * 
-	 * location-of(anatomical site/6, disorder/2)
-   * location-of(anatomical site/6, sign/symptom/3)
-   * location-of(anatomical site/6, procedure/5)
-	 */
-	private static boolean validateArgumentTypes(IdentifiedAnnotationPair pair) {
-	  
-    // allowable arg2 types for location_of
-    HashSet<Integer> okArg2Types = new HashSet<Integer>(Arrays.asList(2, 3, 5));
-    
-	  IdentifiedAnnotation arg1 = pair.getArg1(); // Argument (should be anatomical site)
-	  IdentifiedAnnotation arg2 = pair.getArg2(); // Related_to (should be either disorder,
sign/symptom, or procedure)
-	  int type1 = arg1.getTypeID();
-	  int type2 = arg2.getTypeID();
-	  
-	  if(type1 == 6 && okArg2Types.contains(type2)) {
-	    return true;
-	  }
-	  
-	  return false;
-	}
-	
+		
 	@Override
 	protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation>
relationLookup,
 			IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java?rev=1433160&r1=1433159&r2=1433160&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline2EntityMentionPairRelationExtractorAnnotator.java
Mon Jan 14 21:52:23 2013
@@ -108,7 +108,7 @@ public class Baseline2EntityMentionPairR
 		// compute distance between entities for the pairs where entity types are correct
 		HashMap<IdentifiedAnnotationPair, Integer> distanceLookup = new HashMap<IdentifiedAnnotationPair,
Integer>();
 		for(IdentifiedAnnotationPair pair : pairs) {
-		  if(validateArgumentTypes(pair)) {
+		  if(Utils.validateLocationOfArgumentTypes(pair)) {
 		    try {
           int distance = getDistance(identifiedAnnotationView.getView(CAS.NAME_DEFAULT_SOFA),
pair);
           distanceLookup.put(pair, distance);
@@ -147,31 +147,6 @@ public class Baseline2EntityMentionPairR
 	  return baseTokens.size();
 	}
 	
-	/*
-	 * Are entity types of the arguments valid for location_of? 
-	 * The following combinations are allowed:
-	 * 
-	 * location-of(anatomical site/6, disorder/2)
-   * location-of(anatomical site/6, sign/symptom/3)
-   * location-of(anatomical site/6, procedure/5)
-	 */
-	private static boolean validateArgumentTypes(IdentifiedAnnotationPair pair) {
-	  
-    // allowable arg2 types for location_of
-	  HashSet<Integer> okArg2Types = new HashSet<Integer>(Arrays.asList(2, 3, 5));
-    
-	  IdentifiedAnnotation arg1 = pair.getArg1(); // Argument (should be anatomical site)
-	  IdentifiedAnnotation arg2 = pair.getArg2(); // Related_to (should be either disorder,
sign/symptom, or procedure)
-	  int type1 = arg1.getTypeID();
-	  int type2 = arg2.getTypeID();
-	  
-	  if(type1 == 6 && okArg2Types.contains(type2)) {
-	    return true;
-	  }
-	  
-	  return false;
-	}
-	
 	@Override
 	protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation>
relationLookup,
 			IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java?rev=1433160&r1=1433159&r2=1433160&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java
(original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Baseline3EntityMentionPairRelationExtractorAnnotator.java
Mon Jan 14 21:52:23 2013
@@ -20,12 +20,10 @@ package org.apache.ctakes.relationextrac
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 
 import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
 import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
 import org.apache.ctakes.typesystem.type.textsem.EntityMention;
@@ -85,7 +83,7 @@ public class Baseline3EntityMentionPairR
 		// find pairs enclosed inside a noun phrase
 		List<IdentifiedAnnotationPair> result = new ArrayList<IdentifiedAnnotationPair>();
 		for(IdentifiedAnnotationPair pair : pairs) {
-		  if(validateArgumentTypes(pair)) {
+		  if(Utils.validateLocationOfArgumentTypes(pair)) {
 		    for(TreebankNode nounPhrase : getNounPhrases(identifiedAnnotationView, sentence)) {
 		      if(isEnclosed(pair, nounPhrase)) {
 		        IdentifiedAnnotation arg1 = pair.getArg1();
@@ -148,31 +146,6 @@ public class Baseline3EntityMentionPairR
 	  return nounPhrases;	  
 	}
 	
-	/*
-	 * Are entity types of the arguments valid for location_of? 
-	 * The following combinations are allowed:
-	 * 
-	 * location-of(anatomical site/6, disorder/2)
-   * location-of(anatomical site/6, sign/symptom/3)
-   * location-of(anatomical site/6, procedure/5)
-	 */
-	private static boolean validateArgumentTypes(IdentifiedAnnotationPair pair) {
-	  
-    // allowable arg2 types for location_of
-    HashSet<Integer> okArg2Types = new HashSet<Integer>(Arrays.asList(2, 3, 5));
-    
-	  IdentifiedAnnotation arg1 = pair.getArg1(); // Argument (should be anatomical site)
-	  IdentifiedAnnotation arg2 = pair.getArg2(); // Related_to (should be either disorder,
sign/symptom, or procedure)
-	  int type1 = arg1.getTypeID();
-	  int type2 = arg2.getTypeID();
-	  
-	  if(type1 == 6 && okArg2Types.contains(type2)) {
-	    return true;
-	  }
-	  
-	  return false;
-	}
-	
 	@Override
 	protected String getRelationCategory(Map<List<Annotation>, BinaryTextRelation>
relationLookup,
 			IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {

Added: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java?rev=1433160&view=auto
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java
(added)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/baselines/Utils.java
Mon Jan 14 21:52:23 2013
@@ -0,0 +1,59 @@
+package org.apache.ctakes.relationextractor.ae.baselines;
+
+import java.util.Arrays;
+import java.util.HashSet;
+
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator.IdentifiedAnnotationPair;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+
+public class Utils {
+
+  /**
+   * Are entity types of the arguments valid for location_of relation? 
+   * The following combinations are allowed:
+   * 
+   * location-of(anatomical site/6, disorder/2)
+   * location-of(anatomical site/6, sign/symptom/3)
+   * location-of(anatomical site/6, procedure/5)
+   */
+  public static boolean validateLocationOfArgumentTypes(IdentifiedAnnotationPair pair) {
+    
+    // allowable arg2 types for location_of
+    HashSet<Integer> okArg2Types = new HashSet<Integer>(Arrays.asList(2, 3, 5));
+    
+    IdentifiedAnnotation arg1 = pair.getArg1(); // Argument (should be anatomical site)
+    IdentifiedAnnotation arg2 = pair.getArg2(); // Related_to (should be either disorder,
sign/symptom, or procedure)
+    int type1 = arg1.getTypeID();
+    int type2 = arg2.getTypeID();
+    
+    if(type1 == 6 && okArg2Types.contains(type2)) {
+      return true;
+    }
+    
+    return false;
+  }
+
+  /**
+   * Are entity types of the arguments valid for degree_of relation? 
+   * The following are the valid combinations:
+   * 
+   * degree-of(disorder/2, modifier)
+   * degree-of(sign/symptom/3, modifier)
+   */
+  public static boolean validateDegreeOfArgumentTypes(IdentifiedAnnotationPair pair) {
+
+    // allowable arg1 types
+    HashSet<Integer> okArg1Types = new HashSet<Integer>(Arrays.asList(2, 3));
+
+    IdentifiedAnnotation arg1 = pair.getArg1(); // Argument (should be either disease/disorder
or sign/symptom
+    IdentifiedAnnotation arg2 = pair.getArg2(); // Related_to (should be a modifier)
+    int type1 = arg1.getTypeID();
+    int type2 = arg2.getTypeID();
+
+    if(okArg1Types.contains(type1)) {
+      return true; // assume arg2 is a moddifier
+    }
+
+    return false;
+  }
+}

Modified: incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
URL: http://svn.apache.org/viewvc/incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1433160&r1=1433159&r2=1433160&view=diff
==============================================================================
--- incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
(original)
+++ incubator/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java
Mon Jan 14 21:52:23 2013
@@ -28,6 +28,14 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
+import org.apache.ctakes.relationextractor.ae.baselines.Baseline1DegreeOfRelationExtractorAnnotator;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.EntityMention;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Modifier;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
@@ -72,15 +80,6 @@ import com.google.common.collect.Maps;
 import com.google.common.collect.Ordering;
 import com.google.common.collect.Sets;
 
-import org.apache.ctakes.relationextractor.ae.DegreeOfRelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.EntityMentionPairRelationExtractorAnnotator;
-import org.apache.ctakes.relationextractor.ae.RelationExtractorAnnotator;
-import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
-import org.apache.ctakes.typesystem.type.relation.RelationArgument;
-import org.apache.ctakes.typesystem.type.textsem.EntityMention;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.Modifier;
-
 public class RelationExtractorEvaluation extends Evaluation_ImplBase<File, AnnotationStatistics<String>>
{
 
   public static class Options extends Options_ImplBase {
@@ -138,7 +137,7 @@ public class RelationExtractorEvaluation
   
   // parameter settings currently optimized for SHARP data
   private static final ParameterSettings BEST_DEGREE_OF_PARAMETERS = new ParameterSettings(false,
1.0f, "radial basis function", 10.0, 0.0010);
-  private static final ParameterSettings BEST_NON_DEGREE_OF_PARAMETERS = new ParameterSettings(false,
1.0f, "radial basis function", 10.0, 0.01);
+  private static final ParameterSettings BEST_NON_DEGREE_OF_PARAMETERS = new ParameterSettings(true,
1.0f, "radial basis function", 10.0, 0.01);
   
   public static void main(String[] args) throws Exception {
     Options options = new Options();
@@ -162,7 +161,7 @@ public class RelationExtractorEvaluation
       // determine class for the classifier annotator
       boolean isDegreeOf = relationCategory.equals("degree_of");
       Class<? extends RelationExtractorAnnotator> annotatorClass = isDegreeOf
-          ? DegreeOfRelationExtractorAnnotator.class
+          ? Baseline1DegreeOfRelationExtractorAnnotator.class
           : EntityMentionPairRelationExtractorAnnotator.class;
 
       // determine the type of classifier to be trained



Mime
View raw message