ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From c...@apache.org
Subject svn commit: r1688869 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae: DocTimeRelAnnotator.java EventAnnotator.java EventEventRelationAnnotator.java EventTimeSelfRelationAnnotator.java TemporalRelationExtractorAnnotator.java
Date Thu, 02 Jul 2015 17:41:59 GMT
Author: clin
Date: Thu Jul  2 17:41:59 2015
New Revision: 1688869

URL: http://svn.apache.org/r1688869
Log:
add confidence scores for DocTimeRel, Event, and temporal relation predictions

Added:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java
  (with props)
Modified:
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
    ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java?rev=1688869&r1=1688868&r2=1688869&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java
Thu Jul  2 17:41:59 2015
@@ -26,6 +26,8 @@ import java.util.List;
 
 
 
+import java.util.Map;
+
 import org.apache.ctakes.temporal.ae.feature.ClosestVerbExtractor;
 //import org.apache.ctakes.temporal.ae.feature.CoveredTextToValuesExtractor;
 import org.apache.ctakes.temporal.ae.feature.DateAndMeasurementExtractor;
@@ -171,17 +173,28 @@ public class DocTimeRelAnnotator extends
       if (this.isTraining()) {
     	  if(eventMention.getEvent() != null){
     		  String outcome = eventMention.getEvent().getProperties().getDocTimeRel();
-    		  this.dataWriter.write(new Instance<String>(outcome, features));
+    		  this.dataWriter.write(new Instance<>(outcome, features));
     	  }
       } else {
-        String outcome = this.classifier.classify(features);
+//        String outcome = this.classifier.classify(features);
+        Map.Entry<String, Double> maxEntry = null;
+        for( Map.Entry<String, Double> entry: this.classifier.score(features).entrySet()
){
+        	if(maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0){
+        		maxEntry = entry;
+        	}
+        }
+        
         if (eventMention.getEvent() == null) {
           Event event = new Event(jCas);
           eventMention.setEvent(event);
           EventProperties props = new EventProperties(jCas);
           event.setProperties(props);
         }
-        eventMention.getEvent().getProperties().setDocTimeRel(outcome);
+        if( maxEntry != null){
+        	eventMention.getEvent().getProperties().setDocTimeRel(maxEntry.getKey());
+        	eventMention.getEvent().setConfidence(maxEntry.getValue().floatValue());
+        	System.out.println("event confidence:"+maxEntry.getValue().floatValue());
+        }
       }
     }
   }

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java?rev=1688869&r1=1688868&r2=1688869&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java
Thu Jul  2 17:41:59 2015
@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 
 import org.apache.ctakes.temporal.ae.feature.ChunkingExtractor;
@@ -252,6 +253,7 @@ public class EventAnnotator extends Temp
 
       // during training, the list of all outcomes for the tokens
       List<String> outcomes;
+      List<Double> confidenceScores= new ArrayList<>();
       if (this.isTraining()) {
         List<EventMention> events = Lists.newArrayList();
         for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence))
{
@@ -354,14 +356,27 @@ public class EventAnnotator extends Temp
 
         // if predicting, add prediction to outcomes
         else {
-          outcomes.add(this.classifier.classify(features));
+//          outcomes.add(this.classifier.classify(features));
+          
+          Map.Entry<String, Double> maxEntry = null;
+          for( Map.Entry<String, Double> entry: this.classifier.score(features).entrySet()
){
+          	if(maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0){
+          		maxEntry = entry;
+          	}
+          }
+          
+          outcomes.add(maxEntry.getKey());
+          confidenceScores.add(maxEntry.getValue());
         }
       }
 
       // during prediction, convert chunk labels to events and add them to the CAS
       if (!this.isTraining()) {
         List<EventMention> createdEvents = this.eventChunking.createChunks(jCas, tokens,
outcomes);
+        int mentionidx =0;
         for(EventMention mention : createdEvents){
+          mention.setConfidence(confidenceScores.get(mentionidx).floatValue());
+          mentionidx++;
           if(mention.getEvent() == null){
             Event event = new Event(jCas);
             EventProperties props = new EventProperties(jCas);

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java?rev=1688869&r1=1688868&r2=1688869&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java
Thu Jul  2 17:41:59 2015
@@ -81,7 +81,7 @@ import org.apache.uima.fit.util.JCasUtil
 
 import com.google.common.collect.Lists;
 
-public class EventEventRelationAnnotator extends RelationExtractorAnnotator {
+public class EventEventRelationAnnotator extends TemporalRelationExtractorAnnotator {
 
 	public static AnalysisEngineDescription createDataWriterDescription(
 			Class<? extends DataWriter<String>> dataWriterClass,
@@ -302,7 +302,7 @@ public class EventEventRelationAnnotator
 
 	@Override
 	protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
-			IdentifiedAnnotation arg2, String predictedCategory) {
+			IdentifiedAnnotation arg2, String predictedCategory, double confidence) {
 		RelationArgument relArg1 = new RelationArgument(jCas);
 		relArg1.setArgument(arg1);
 		relArg1.setRole("Arg1");
@@ -315,6 +315,7 @@ public class EventEventRelationAnnotator
 		relation.setArg1(relArg1);
 		relation.setArg2(relArg2);
 		relation.setCategory(predictedCategory);
+		relation.setConfidence(confidence);
 		relation.addToIndexes();
 	}
 

Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java?rev=1688869&r1=1688868&r2=1688869&view=diff
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java
(original)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java
Thu Jul  2 17:41:59 2015
@@ -77,7 +77,7 @@ import org.apache.uima.fit.util.JCasUtil
 
 import com.google.common.collect.Lists;
 
-public class EventTimeSelfRelationAnnotator extends RelationExtractorAnnotator {
+public class EventTimeSelfRelationAnnotator extends TemporalRelationExtractorAnnotator {
 
 	public static AnalysisEngineDescription createDataWriterDescription(
 			Class<? extends DataWriter<String>> dataWriterClass,
@@ -195,7 +195,7 @@ public class EventTimeSelfRelationAnnota
 
 	@Override
 	protected void createRelation(JCas jCas, IdentifiedAnnotation arg1,
-			IdentifiedAnnotation arg2, String predictedCategory) {
+			IdentifiedAnnotation arg2, String predictedCategory, double confidence) {
 		RelationArgument relArg1 = new RelationArgument(jCas);
 		relArg1.setArgument(arg1);
 		relArg1.setRole("Arg1");
@@ -208,6 +208,7 @@ public class EventTimeSelfRelationAnnota
 		relation.setArg1(relArg1);
 		relation.setArg2(relArg2);
 		relation.setCategory(predictedCategory);
+		relation.setConfidence(confidence);
 		relation.addToIndexes();
 	}
 

Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java?rev=1688869&view=auto
==============================================================================
--- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java
(added)
+++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java
Thu Jul  2 17:41:59 2015
@@ -0,0 +1,331 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.ctakes.temporal.ae;
+
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+import org.apache.ctakes.relationextractor.ae.features.DependencyPathFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.DependencyTreeFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.NamedEntityFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.PhraseChunkingExtractor;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation;
+import org.apache.ctakes.typesystem.type.relation.RelationArgument;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.uima.UimaContext;
+import org.apache.uima.UimaContextAdmin;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ConfigurationManager;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.cleartk.ml.CleartkAnnotator;
+import org.cleartk.ml.CleartkProcessingException;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.Instance;
+import org.cleartk.ml.jar.GenericJarClassifierFactory;
+import org.cleartk.util.ViewUriUtil;
+
+import com.google.common.collect.Lists;
+
+public abstract class TemporalRelationExtractorAnnotator extends CleartkAnnotator<String>
{
+
+  public static final String NO_RELATION_CATEGORY = "-NONE-";
+
+  public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE =
+      "ProbabilityOfKeepingANegativeExample";
+
+  @ConfigurationParameter(
+      name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE,
+      mandatory = false,
+      description = "probability that a negative example should be retained for training")
+  protected double probabilityOfKeepingANegativeExample = 1.0;
+
+  protected Random coin = new Random(0);
+
+  private List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>>
featureExtractors = this.getFeatureExtractors();
+
+  private Class<? extends Annotation> coveringClass = getCoveringClass();
+
+  /**
+   * Defines the list of feature extractors used by the classifier. Subclasses
+   * may override this method to provide a different set of feature extractors.
+   * 
+   * @return The list of feature extractors to use.
+   */
+  protected List<RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>>
getFeatureExtractors() {
+    return Lists.newArrayList(
+        new TokenFeaturesExtractor(),
+        new PartOfSpeechFeaturesExtractor(),
+        new PhraseChunkingExtractor(),
+        new NamedEntityFeaturesExtractor(),
+        new DependencyTreeFeaturesExtractor(),
+        new DependencyPathFeaturesExtractor());
+  }
+
+  protected Class<? extends BinaryTextRelation> getRelationClass() {
+    return BinaryTextRelation.class;
+  }
+
+  /*
+   * Defines the type of annotation that the relation exists within (sentence,
+   * document, segment)
+   */
+  protected abstract Class<? extends Annotation> getCoveringClass();
+
+  /**
+   * Selects the relevant mentions/annotations within a covering annotation for
+   * relation identification/extraction.
+   */
+  protected abstract List<IdentifiedAnnotationPair> getCandidateRelationArgumentPairs(
+      JCas identifiedAnnotationView,
+      Annotation coveringAnnotation);
+
+  /**
+   * Workaround for https://code.google.com/p/cleartk/issues/detail?id=346
+   * 
+   * Not intended for external use
+   */
+  static void allowClassifierModelOnClasspath(UimaContext context) {
+    String modelPathParam = GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH;
+    String modelPath = (String) context.getConfigParameterValue(modelPathParam);
+    if (modelPath != null) {
+      URL modelClasspathURL = TemporalRelationExtractorAnnotator.class.getResource(modelPath);
+      if (modelClasspathURL != null) {
+        UimaContextAdmin contextAdmin = (UimaContextAdmin) context;
+        ConfigurationManager manager = contextAdmin.getConfigurationManager();
+        String qualifiedModelPathParam = contextAdmin.getQualifiedContextName() + modelPathParam;
+        manager.setConfigParameterValue(qualifiedModelPathParam, modelClasspathURL.toString());
+      }
+    }
+  }
+
+  @Override
+  public void initialize(UimaContext context) throws ResourceInitializationException {
+    allowClassifierModelOnClasspath(context);
+    super.initialize(context);
+  }
+
+  /*
+   * Implement the standard UIMA process method.
+   */
+  @Override
+  public void process(JCas jCas) throws AnalysisEngineProcessException {
+
+    // lookup from pair of annotations to binary text relation
+    // note: assumes that there will be at most one relation per pair
+    Map<List<Annotation>, BinaryTextRelation> relationLookup;
+    relationLookup = new HashMap<>();
+    if (this.isTraining()) {
+      relationLookup = new HashMap<>();
+      for (BinaryTextRelation relation : JCasUtil.select(jCas, this.getRelationClass()))
{
+        Annotation arg1 = relation.getArg1().getArgument();
+        Annotation arg2 = relation.getArg2().getArgument();
+        // The key is a list of args so we can do bi-directional lookup
+        List<Annotation> key = Arrays.asList(arg1, arg2);
+        if(relationLookup.containsKey(key)){
+         String reln = relationLookup.get(key).getCategory();
+         System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
+         System.err.println("Error! This attempted relation " + relation.getCategory() +
" already has a relation " + reln + " at this span: " + arg1.getCoveredText() + " -- " + arg2.getCoveredText());
+        }
+        relationLookup.put(key, relation);
+      }
+    }
+
+    // walk through each sentence in the text
+    for (Annotation coveringAnnotation : JCasUtil.select(jCas, coveringClass)) {
+
+      // collect all relevant relation arguments from the sentence
+      List<IdentifiedAnnotationPair> candidatePairs =
+          this.getCandidateRelationArgumentPairs(jCas, coveringAnnotation);
+
+      // walk through the pairs of annotations
+      for (IdentifiedAnnotationPair pair : candidatePairs) {
+        IdentifiedAnnotation arg1 = pair.getArg1();
+        IdentifiedAnnotation arg2 = pair.getArg2();
+        // apply all the feature extractors to extract the list of features
+        List<Feature> features = new ArrayList<>();
+        for (RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation> extractor
: this.featureExtractors) {
+        	 List<Feature> feats = extractor.extract(jCas, arg1, arg2);
+        	 if (feats != null)  features.addAll(feats);
+        }
+
+        // sanity check on feature values
+        for (Feature feature : features) {
+          if (feature.getValue() == null) {
+        	feature.setValue("NULL");
+            String message = String.format("Null value found in %s from %s", feature, features);
+            System.err.println(message);
+//            throw new IllegalArgumentException(String.format(message, feature, features));
+          }
+        }
+
+        // during training, feed the features to the data writer
+        if (this.isTraining()) {
+          String category = this.getRelationCategory(relationLookup, arg1, arg2);
+          if (category == null) {
+            continue;
+          }
+
+          // create a classification instance and write it to the training data
+          this.dataWriter.write(new Instance<>(category, features));
+        }
+
+        // during classification feed the features to the classifier and create
+        // annotations
+        else {
+//          String predictedCategory = this.classify(features);
+          
+          Map.Entry<String, Double> maxEntry = null;
+          for( Map.Entry<String, Double> entry: this.classifier.score(features).entrySet()
){
+          	if(maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0){
+          		maxEntry = entry;
+          	}
+          }
+          
+          String predictedCategory = null;
+          double confidence = 0d;
+          if(maxEntry != null){
+        	  predictedCategory = maxEntry.getKey();
+        	  confidence = maxEntry.getValue().doubleValue();
+          }
+          
+          // add a relation annotation if a true relation was predicted
+          if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {
+
+            // if we predict an inverted relation, reverse the order of the
+            // arguments
+            if (predictedCategory.endsWith("-1")) {
+              predictedCategory = predictedCategory.substring(0, predictedCategory.length()
- 2);
+              IdentifiedAnnotation temp = arg1;
+              arg1 = arg2;
+              arg2 = temp;
+            }
+
+            createRelation(jCas, arg1, arg2, predictedCategory, confidence);
+          }
+        }
+      } // end pair in pairs
+    } // end for(Sentence)
+  }
+
+  /**
+   * Looks up the arguments in the specified lookup table and converts the
+   * relation into a label for classification
+   * 
+   * @return If this category should not be processed for training return
+   *         <i>null</i> otherwise it returns the label sent to the datawriter
+   */
+  protected String getRelationCategory(
+      Map<List<Annotation>, BinaryTextRelation> relationLookup,
+      IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2) {
+    BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2));
+    String category;
+    if (relation != null) {
+      category = relation.getCategory();
+    } else if (coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) {
+      category = NO_RELATION_CATEGORY;
+    } else {
+      category = null;
+    }
+    return category;
+  }
+
+  /**
+   * Predict an outcome given a set of features. By default, this simply
+   * delegates to the object's <code>classifier</code>. Subclasses may override
+   * this method to implement more complex classification procedures.
+   * 
+   * @param features
+   *          The features to be classified.
+   * @return The predicted outcome (label) for the features.
+   */
+  protected String classify(List<Feature> features) throws CleartkProcessingException
{
+    return this.classifier.classify(features);
+  }
+
+  /**
+   * Create a UIMA relation type based on arguments and the relation label. This
+   * allows subclasses to create/define their own types: e.g. coreference can
+   * create CoreferenceRelation instead of BinaryTextRelation
+   * 
+   * @param jCas
+   *          - JCas object, needed to create new UIMA types
+   * @param arg1
+   *          - First argument to relation
+   * @param arg2
+   *          - Second argument to relation
+   * @param predictedCategory
+   *          - Name of relation
+   * @param confidence 
+   * 		  - Confidence score of the relation prediction
+   */
+  protected void createRelation(
+      JCas jCas,
+      IdentifiedAnnotation arg1,
+      IdentifiedAnnotation arg2,
+      String predictedCategory, 
+      double confidence) {
+    // add the relation to the CAS
+    RelationArgument relArg1 = new RelationArgument(jCas);
+    relArg1.setArgument(arg1);
+    relArg1.setRole("Argument");
+    relArg1.addToIndexes();
+    RelationArgument relArg2 = new RelationArgument(jCas);
+    relArg2.setArgument(arg2);
+    relArg2.setRole("Related_to");
+    relArg2.addToIndexes();
+    BinaryTextRelation relation = new BinaryTextRelation(jCas);
+    relation.setArg1(relArg1);
+    relation.setArg2(relArg2);
+    relation.setCategory(predictedCategory);
+    relation.setConfidence(confidence);
+    relation.addToIndexes();
+  }
+
+  public static class IdentifiedAnnotationPair {
+
+    private final IdentifiedAnnotation arg1;
+    private final IdentifiedAnnotation arg2;
+
+    public IdentifiedAnnotationPair(IdentifiedAnnotation arg1, IdentifiedAnnotation arg2)
{
+      this.arg1 = arg1;
+      this.arg2 = arg2;
+    }
+
+    public final IdentifiedAnnotation getArg1() {
+      return arg1;
+    }
+
+    public final IdentifiedAnnotation getArg2() {
+      return arg2;
+    }
+  }
+}

Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain



Mime
View raw message