incubator-ctakes-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Miller, Timothy" <Timothy.Mil...@childrens.harvard.edu>
Subject generify relation extractor annotator
Date Thu, 13 Dec 2012 12:56:38 GMT
Any interest in generifying the relation extractor to make it easier to instantiate all relation
extractors as clearTK style annotators?  This would make it easier to share feature extractors,
allow me to rewrite the coreference code base, and make it easier to contribute to multiple
projects without learning a bunch of different implementations.

For example, the quick mock-up patch below (which is not compiler-error free, see TODOs) changes
the abstract RelationExtractorAnnotator class to be templated with 3 types: Spanning Type
(what is the span of this relation), and Arg1 and Arg2 types.  All template types extend Annotation.

So the EntityMentionPairExtractor would extend RelationExtractorAnnotator<Sentence,IdentifiedAnnotation,IdentifiedAnnotation>,
coreference would extend with the args <Document,IdentifiedAnnotation,IdentifiedAnnotation>,
DegreeOf would extend with args <Sentence,IdentifiedAnnotation,Modifier>,
and so on.

The other aspect is the feature extractor, which is currently defined in the abstract class.
 However, it is protected so we could just have default features in this class and expect
children to define their own features.  Alternatively, we could make it an abstract method
so that instantiations have to explicitly enumerate features.

Any thoughts? It is early in the morning so let me know if I missed something obvious.

Tim


Index: src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java
===================================================================
--- src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java	(revision
1421247)
+++ src/main/java/org/apache/ctakes/relationextractor/ae/RelationExtractorAnnotator.java	(working
copy)
@@ -49,7 +49,7 @@
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;

-public abstract class RelationExtractorAnnotator extends CleartkAnnotator<String> {
+public abstract class RelationExtractorAnnotator<SPANNING_TYPE extends Annotation,A1TYPE
extends Annotation,A2TYPE extends Annotation> extends CleartkAnnotator<String> {

  public static final String NO_RELATION_CATEGORY = "-NONE-";

@@ -94,7 +94,7 @@
  /**
   * Selects the relevant mentions/annotations within a sentence for relation identification/extraction
   */
-  public abstract List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(JCas
identifiedAnnotationView, Sentence sentence);
+  public abstract List<IdentifiedAnnotationPair<A1TYPE,A2TYPE>> getCandidateRelationArgumentPairs(JCas
identifiedAnnotationView, SPANNING_TYPE span);

  /*
   * Implement the standard UIMA process method.
@@ -122,15 +122,16 @@
    }

    // walk through each sentence in the text
-    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
+    // TODO - causes compiler error, didn't think about how to fix this yet
+    for (SPANNING_TYPE span : JCasUtil.select(jCas, SPANNING_TYPE.class)) {

    	// collect all relevant relation arguments from the sentence
-    	List<IdentifiedAnnotationPair> candidatePairs = this.getCandidateRelationArgumentPairs(identifiedAnnotationView,
sentence);
+    	List<IdentifiedAnnotationPair<A1TYPE,A2TYPE>> candidatePairs = this.getCandidateRelationArgumentPairs(identifiedAnnotationView,
span);

    	// walk through the pairs of annotations
-    	for (IdentifiedAnnotationPair pair : candidatePairs) {
-    		IdentifiedAnnotation arg1 = pair.getArg1();
-    		IdentifiedAnnotation arg2 = pair.getArg2();
+    	for (IdentifiedAnnotationPair<A1TYPE,A2TYPE> pair : candidatePairs) {
+    		A1TYPE arg1 = pair.getArg1();
+    		A2TYPE arg2 = pair.getArg2();
    		// apply all the feature extractors to extract the list of features
    		List<Feature> features = new ArrayList<Feature>();
    		for (RelationFeaturesExtractor extractor : this.featureExtractors) {
@@ -162,9 +163,10 @@
    			if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {

    				// if we predict an inverted relation, reverse the order of the arguments
+    				// TODO - only makes sense if TYPE1 and TYPE2 are the same...maybe another parameter?
also causes compiler error in this state
    				if (predictedCategory.endsWith("-1")) {
    					predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2);
-    					IdentifiedAnnotation temp = arg1;
+    					A1TYPE temp = arg1;
    					arg1 = arg2;
    					arg2 = temp;
    				}
@@ -198,7 +200,7 @@
   *         otherwise it returns the label sent to the datawriter
   */
  protected abstract String getRelationCategory(Map<List<Annotation>, BinaryTextRelation>
relationLookup,
-		  IdentifiedAnnotation arg1, IdentifiedAnnotation arg2);
+		  A1TYPE arg1, A2TYPE arg2);

  /**
   * Creates a lookup map between lists of arguments and their relation
@@ -225,17 +227,17 @@
	  return relationLookup;
  }

-  public static class IdentifiedAnnotationPair {
+  public static class IdentifiedAnnotationPair<A1TYPE,A2TYPE> {
	  
-	 private final IdentifiedAnnotation arg1;
-	 private final IdentifiedAnnotation arg2;
-	 public IdentifiedAnnotationPair(IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) {
+	 private final A1TYPE arg1;
+	 private final A2TYPE arg2;
+	 public IdentifiedAnnotationPair(A1TYPE arg1, A2TYPE arg2) {
		 this.arg1 = arg1;
		 this.arg2 = arg2;
	 }
	 
-	 public final IdentifiedAnnotation getArg1() { return arg1; }
+	 public final A1TYPE getArg1() { return arg1; }
		 
-	 public final IdentifiedAnnotation getArg2() { return arg2; }
+	 public final A2TYPE getArg2() { return arg2; }
  }
}
Index: src/main/java/org/apache/ctakes/relationextractor/ae/features/RelationFeaturesExtractor.java
===================================================================
--- src/main/java/org/apache/ctakes/relationextractor/ae/features/RelationFeaturesExtractor.java
(revision 1421247)
+++ src/main/java/org/apache/ctakes/relationextractor/ae/features/RelationFeaturesExtractor.java
(working copy)
@@ -22,10 +22,9 @@

import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.classifier.Feature;

-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-
/**
 * Define an interface for people to implement feature extractors.
 */
@@ -42,6 +41,6 @@
   *          The second identified annotation in the text.
   * @return A list of features indicative of the relation between the named entities
   */
-  public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation
arg2)
+  public List<Feature> extract(JCas jCas, Annotation arg1, Annotation arg2)
      throws AnalysisEngineProcessException;
}
\ No newline at end of file


Mime
View raw message