Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A6DC81890E for ; Thu, 2 Jul 2015 17:42:00 +0000 (UTC) Received: (qmail 90144 invoked by uid 500); 2 Jul 2015 17:42:00 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 90108 invoked by uid 500); 2 Jul 2015 17:42:00 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 90099 invoked by uid 99); 2 Jul 2015 17:42:00 -0000 Received: from eris.apache.org (HELO hades.apache.org) (140.211.11.105) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 02 Jul 2015 17:42:00 +0000 Received: from hades.apache.org (localhost [127.0.0.1]) by hades.apache.org (ASF Mail Server at hades.apache.org) with ESMTP id 3F1D1AC013F for ; Thu, 2 Jul 2015 17:42:00 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1688869 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae: DocTimeRelAnnotator.java EventAnnotator.java EventEventRelationAnnotator.java EventTimeSelfRelationAnnotator.java TemporalRelationExtractorAnnotator.java Date: Thu, 02 Jul 2015 17:41:59 -0000 To: commits@ctakes.apache.org From: clin@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20150702174200.3F1D1AC013F@hades.apache.org> Author: clin Date: Thu Jul 2 17:41:59 2015 New Revision: 1688869 URL: http://svn.apache.org/r1688869 Log: add confidence scores for DocTimeRel, Event, and temporal relation predictions Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java (with props) Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java?rev=1688869&r1=1688868&r2=1688869&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelAnnotator.java Thu Jul 2 17:41:59 2015 @@ -26,6 +26,8 @@ import java.util.List; +import java.util.Map; + import org.apache.ctakes.temporal.ae.feature.ClosestVerbExtractor; //import org.apache.ctakes.temporal.ae.feature.CoveredTextToValuesExtractor; import org.apache.ctakes.temporal.ae.feature.DateAndMeasurementExtractor; @@ -171,17 +173,28 @@ public class DocTimeRelAnnotator extends if (this.isTraining()) { if(eventMention.getEvent() != null){ String outcome = eventMention.getEvent().getProperties().getDocTimeRel(); - this.dataWriter.write(new Instance(outcome, features)); + this.dataWriter.write(new Instance<>(outcome, features)); } } else { - String outcome = this.classifier.classify(features); +// String outcome = this.classifier.classify(features); + Map.Entry maxEntry = null; + for( Map.Entry entry: this.classifier.score(features).entrySet() ){ + if(maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0){ + maxEntry = entry; + } + } + if (eventMention.getEvent() == null) { Event event = new Event(jCas); eventMention.setEvent(event); EventProperties props = new EventProperties(jCas); event.setProperties(props); } - eventMention.getEvent().getProperties().setDocTimeRel(outcome); + if( maxEntry != null){ + eventMention.getEvent().getProperties().setDocTimeRel(maxEntry.getKey()); + eventMention.getEvent().setConfidence(maxEntry.getValue().floatValue()); + System.out.println("event confidence:"+maxEntry.getValue().floatValue()); + } } } } Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java?rev=1688869&r1=1688868&r2=1688869&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventAnnotator.java Thu Jul 2 17:41:59 2015 @@ -24,6 +24,7 @@ import java.io.IOException; import java.net.URI; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Random; import org.apache.ctakes.temporal.ae.feature.ChunkingExtractor; @@ -252,6 +253,7 @@ public class EventAnnotator extends Temp // during training, the list of all outcomes for the tokens List outcomes; + List confidenceScores= new ArrayList<>(); if (this.isTraining()) { List events = Lists.newArrayList(); for (EventMention event : JCasUtil.selectCovered(jCas, EventMention.class, sentence)) { @@ -354,14 +356,27 @@ public class EventAnnotator extends Temp // if predicting, add prediction to outcomes else { - outcomes.add(this.classifier.classify(features)); +// outcomes.add(this.classifier.classify(features)); + + Map.Entry maxEntry = null; + for( Map.Entry entry: this.classifier.score(features).entrySet() ){ + if(maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0){ + maxEntry = entry; + } + } + + outcomes.add(maxEntry.getKey()); + confidenceScores.add(maxEntry.getValue()); } } // during prediction, convert chunk labels to events and add them to the CAS if (!this.isTraining()) { List createdEvents = this.eventChunking.createChunks(jCas, tokens, outcomes); + int mentionidx =0; for(EventMention mention : createdEvents){ + mention.setConfidence(confidenceScores.get(mentionidx).floatValue()); + mentionidx++; if(mention.getEvent() == null){ Event event = new Event(jCas); EventProperties props = new EventProperties(jCas); Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java?rev=1688869&r1=1688868&r2=1688869&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventEventRelationAnnotator.java Thu Jul 2 17:41:59 2015 @@ -81,7 +81,7 @@ import org.apache.uima.fit.util.JCasUtil import com.google.common.collect.Lists; -public class EventEventRelationAnnotator extends RelationExtractorAnnotator { +public class EventEventRelationAnnotator extends TemporalRelationExtractorAnnotator { public static AnalysisEngineDescription createDataWriterDescription( Class> dataWriterClass, @@ -302,7 +302,7 @@ public class EventEventRelationAnnotator @Override protected void createRelation(JCas jCas, IdentifiedAnnotation arg1, - IdentifiedAnnotation arg2, String predictedCategory) { + IdentifiedAnnotation arg2, String predictedCategory, double confidence) { RelationArgument relArg1 = new RelationArgument(jCas); relArg1.setArgument(arg1); relArg1.setRole("Arg1"); @@ -315,6 +315,7 @@ public class EventEventRelationAnnotator relation.setArg1(relArg1); relation.setArg2(relArg2); relation.setCategory(predictedCategory); + relation.setConfidence(confidence); relation.addToIndexes(); } Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java?rev=1688869&r1=1688868&r2=1688869&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/EventTimeSelfRelationAnnotator.java Thu Jul 2 17:41:59 2015 @@ -77,7 +77,7 @@ import org.apache.uima.fit.util.JCasUtil import com.google.common.collect.Lists; -public class EventTimeSelfRelationAnnotator extends RelationExtractorAnnotator { +public class EventTimeSelfRelationAnnotator extends TemporalRelationExtractorAnnotator { public static AnalysisEngineDescription createDataWriterDescription( Class> dataWriterClass, @@ -195,7 +195,7 @@ public class EventTimeSelfRelationAnnota @Override protected void createRelation(JCas jCas, IdentifiedAnnotation arg1, - IdentifiedAnnotation arg2, String predictedCategory) { + IdentifiedAnnotation arg2, String predictedCategory, double confidence) { RelationArgument relArg1 = new RelationArgument(jCas); relArg1.setArgument(arg1); relArg1.setRole("Arg1"); @@ -208,6 +208,7 @@ public class EventTimeSelfRelationAnnota relation.setArg1(relArg1); relation.setArg2(relArg2); relation.setCategory(predictedCategory); + relation.setConfidence(confidence); relation.addToIndexes(); } Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java?rev=1688869&view=auto ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java (added) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java Thu Jul 2 17:41:59 2015 @@ -0,0 +1,331 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.temporal.ae; + +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.ctakes.relationextractor.ae.features.DependencyPathFeaturesExtractor; +import org.apache.ctakes.relationextractor.ae.features.DependencyTreeFeaturesExtractor; +import org.apache.ctakes.relationextractor.ae.features.NamedEntityFeaturesExtractor; +import org.apache.ctakes.relationextractor.ae.features.PartOfSpeechFeaturesExtractor; +import org.apache.ctakes.relationextractor.ae.features.PhraseChunkingExtractor; +import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor; +import org.apache.ctakes.relationextractor.ae.features.TokenFeaturesExtractor; +import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation; +import org.apache.ctakes.typesystem.type.relation.RelationArgument; +import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; +import org.apache.uima.UimaContext; +import org.apache.uima.UimaContextAdmin; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.fit.descriptor.ConfigurationParameter; +import org.apache.uima.fit.util.JCasUtil; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.apache.uima.resource.ConfigurationManager; +import org.apache.uima.resource.ResourceInitializationException; +import org.cleartk.ml.CleartkAnnotator; +import org.cleartk.ml.CleartkProcessingException; +import org.cleartk.ml.Feature; +import org.cleartk.ml.Instance; +import org.cleartk.ml.jar.GenericJarClassifierFactory; +import org.cleartk.util.ViewUriUtil; + +import com.google.common.collect.Lists; + +public abstract class TemporalRelationExtractorAnnotator extends CleartkAnnotator { + + public static final String NO_RELATION_CATEGORY = "-NONE-"; + + public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE = + "ProbabilityOfKeepingANegativeExample"; + + @ConfigurationParameter( + name = PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE, + mandatory = false, + description = "probability that a negative example should be retained for training") + protected double probabilityOfKeepingANegativeExample = 1.0; + + protected Random coin = new Random(0); + + private List> featureExtractors = this.getFeatureExtractors(); + + private Class coveringClass = getCoveringClass(); + + /** + * Defines the list of feature extractors used by the classifier. Subclasses + * may override this method to provide a different set of feature extractors. + * + * @return The list of feature extractors to use. + */ + protected List> getFeatureExtractors() { + return Lists.newArrayList( + new TokenFeaturesExtractor(), + new PartOfSpeechFeaturesExtractor(), + new PhraseChunkingExtractor(), + new NamedEntityFeaturesExtractor(), + new DependencyTreeFeaturesExtractor(), + new DependencyPathFeaturesExtractor()); + } + + protected Class getRelationClass() { + return BinaryTextRelation.class; + } + + /* + * Defines the type of annotation that the relation exists within (sentence, + * document, segment) + */ + protected abstract Class getCoveringClass(); + + /** + * Selects the relevant mentions/annotations within a covering annotation for + * relation identification/extraction. + */ + protected abstract List getCandidateRelationArgumentPairs( + JCas identifiedAnnotationView, + Annotation coveringAnnotation); + + /** + * Workaround for https://code.google.com/p/cleartk/issues/detail?id=346 + * + * Not intended for external use + */ + static void allowClassifierModelOnClasspath(UimaContext context) { + String modelPathParam = GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH; + String modelPath = (String) context.getConfigParameterValue(modelPathParam); + if (modelPath != null) { + URL modelClasspathURL = TemporalRelationExtractorAnnotator.class.getResource(modelPath); + if (modelClasspathURL != null) { + UimaContextAdmin contextAdmin = (UimaContextAdmin) context; + ConfigurationManager manager = contextAdmin.getConfigurationManager(); + String qualifiedModelPathParam = contextAdmin.getQualifiedContextName() + modelPathParam; + manager.setConfigParameterValue(qualifiedModelPathParam, modelClasspathURL.toString()); + } + } + } + + @Override + public void initialize(UimaContext context) throws ResourceInitializationException { + allowClassifierModelOnClasspath(context); + super.initialize(context); + } + + /* + * Implement the standard UIMA process method. + */ + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + + // lookup from pair of annotations to binary text relation + // note: assumes that there will be at most one relation per pair + Map, BinaryTextRelation> relationLookup; + relationLookup = new HashMap<>(); + if (this.isTraining()) { + relationLookup = new HashMap<>(); + for (BinaryTextRelation relation : JCasUtil.select(jCas, this.getRelationClass())) { + Annotation arg1 = relation.getArg1().getArgument(); + Annotation arg2 = relation.getArg2().getArgument(); + // The key is a list of args so we can do bi-directional lookup + List key = Arrays.asList(arg1, arg2); + if(relationLookup.containsKey(key)){ + String reln = relationLookup.get(key).getCategory(); + System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString()); + System.err.println("Error! This attempted relation " + relation.getCategory() + " already has a relation " + reln + " at this span: " + arg1.getCoveredText() + " -- " + arg2.getCoveredText()); + } + relationLookup.put(key, relation); + } + } + + // walk through each sentence in the text + for (Annotation coveringAnnotation : JCasUtil.select(jCas, coveringClass)) { + + // collect all relevant relation arguments from the sentence + List candidatePairs = + this.getCandidateRelationArgumentPairs(jCas, coveringAnnotation); + + // walk through the pairs of annotations + for (IdentifiedAnnotationPair pair : candidatePairs) { + IdentifiedAnnotation arg1 = pair.getArg1(); + IdentifiedAnnotation arg2 = pair.getArg2(); + // apply all the feature extractors to extract the list of features + List features = new ArrayList<>(); + for (RelationFeaturesExtractor extractor : this.featureExtractors) { + List feats = extractor.extract(jCas, arg1, arg2); + if (feats != null) features.addAll(feats); + } + + // sanity check on feature values + for (Feature feature : features) { + if (feature.getValue() == null) { + feature.setValue("NULL"); + String message = String.format("Null value found in %s from %s", feature, features); + System.err.println(message); +// throw new IllegalArgumentException(String.format(message, feature, features)); + } + } + + // during training, feed the features to the data writer + if (this.isTraining()) { + String category = this.getRelationCategory(relationLookup, arg1, arg2); + if (category == null) { + continue; + } + + // create a classification instance and write it to the training data + this.dataWriter.write(new Instance<>(category, features)); + } + + // during classification feed the features to the classifier and create + // annotations + else { +// String predictedCategory = this.classify(features); + + Map.Entry maxEntry = null; + for( Map.Entry entry: this.classifier.score(features).entrySet() ){ + if(maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0){ + maxEntry = entry; + } + } + + String predictedCategory = null; + double confidence = 0d; + if(maxEntry != null){ + predictedCategory = maxEntry.getKey(); + confidence = maxEntry.getValue().doubleValue(); + } + + // add a relation annotation if a true relation was predicted + if (!predictedCategory.equals(NO_RELATION_CATEGORY)) { + + // if we predict an inverted relation, reverse the order of the + // arguments + if (predictedCategory.endsWith("-1")) { + predictedCategory = predictedCategory.substring(0, predictedCategory.length() - 2); + IdentifiedAnnotation temp = arg1; + arg1 = arg2; + arg2 = temp; + } + + createRelation(jCas, arg1, arg2, predictedCategory, confidence); + } + } + } // end pair in pairs + } // end for(Sentence) + } + + /** + * Looks up the arguments in the specified lookup table and converts the + * relation into a label for classification + * + * @return If this category should not be processed for training return + * null otherwise it returns the label sent to the datawriter + */ + protected String getRelationCategory( + Map, BinaryTextRelation> relationLookup, + IdentifiedAnnotation arg1, + IdentifiedAnnotation arg2) { + BinaryTextRelation relation = relationLookup.get(Arrays.asList(arg1, arg2)); + String category; + if (relation != null) { + category = relation.getCategory(); + } else if (coin.nextDouble() <= this.probabilityOfKeepingANegativeExample) { + category = NO_RELATION_CATEGORY; + } else { + category = null; + } + return category; + } + + /** + * Predict an outcome given a set of features. By default, this simply + * delegates to the object's classifier. Subclasses may override + * this method to implement more complex classification procedures. + * + * @param features + * The features to be classified. + * @return The predicted outcome (label) for the features. + */ + protected String classify(List features) throws CleartkProcessingException { + return this.classifier.classify(features); + } + + /** + * Create a UIMA relation type based on arguments and the relation label. This + * allows subclasses to create/define their own types: e.g. coreference can + * create CoreferenceRelation instead of BinaryTextRelation + * + * @param jCas + * - JCas object, needed to create new UIMA types + * @param arg1 + * - First argument to relation + * @param arg2 + * - Second argument to relation + * @param predictedCategory + * - Name of relation + * @param confidence + * - Confidence score of the relation prediction + */ + protected void createRelation( + JCas jCas, + IdentifiedAnnotation arg1, + IdentifiedAnnotation arg2, + String predictedCategory, + double confidence) { + // add the relation to the CAS + RelationArgument relArg1 = new RelationArgument(jCas); + relArg1.setArgument(arg1); + relArg1.setRole("Argument"); + relArg1.addToIndexes(); + RelationArgument relArg2 = new RelationArgument(jCas); + relArg2.setArgument(arg2); + relArg2.setRole("Related_to"); + relArg2.addToIndexes(); + BinaryTextRelation relation = new BinaryTextRelation(jCas); + relation.setArg1(relArg1); + relation.setArg2(relArg2); + relation.setCategory(predictedCategory); + relation.setConfidence(confidence); + relation.addToIndexes(); + } + + public static class IdentifiedAnnotationPair { + + private final IdentifiedAnnotation arg1; + private final IdentifiedAnnotation arg2; + + public IdentifiedAnnotationPair(IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) { + this.arg1 = arg1; + this.arg2 = arg2; + } + + public final IdentifiedAnnotation getArg1() { + return arg1; + } + + public final IdentifiedAnnotation getArg2() { + return arg2; + } + } +} Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/TemporalRelationExtractorAnnotator.java ------------------------------------------------------------------------------ svn:mime-type = text/plain