Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2ECAC10241 for ; Tue, 30 Jul 2013 13:38:04 +0000 (UTC) Received: (qmail 72779 invoked by uid 500); 30 Jul 2013 13:38:04 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 72741 invoked by uid 500); 30 Jul 2013 13:38:01 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 72734 invoked by uid 99); 30 Jul 2013 13:38:00 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 30 Jul 2013 13:38:00 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 30 Jul 2013 13:37:53 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id B6CDD238896F; Tue, 30 Jul 2013 13:37:31 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1508435 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion: attributes/features/selection/ eval/ medfacts/cleartk/ train/ Date: Tue, 30 Jul 2013 13:37:31 -0000 To: commits@ctakes.apache.org From: swu@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130730133731.B6CDD238896F@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: swu Date: Tue Jul 30 13:37:30 2013 New Revision: 1508435 URL: http://svn.apache.org/r1508435 Log: ctakes-assertion: more modifications to improve evaluation and logging for feature selection tests Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java (with props) Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/selection/Chi2FeatureSelection.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/selection/Chi2FeatureSelection.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/selection/Chi2FeatureSelection.java?rev=1508435&r1=1508434&r2=1508435&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/selection/Chi2FeatureSelection.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/attributes/features/selection/Chi2FeatureSelection.java Tue Jul 30 13:37:30 2013 @@ -38,9 +38,12 @@ public class Chi2FeatureSelection featValueClassCount; - public Chi2Scorer() { + private boolean yates = false; + + public Chi2Scorer(boolean yate) { this.classCounts = HashMultiset. create(); this.featValueClassCount = HashBasedTable. create(); + this.yates = yate; } public void update(String featureName, OUTCOME_T outcome, int occurrences) { @@ -88,13 +91,12 @@ public class Chi2FeatureSelection 0) { double diff = Math.abs(posiOutcomeCounts[lbl] - expected); - if (yates) { // apply Yate's correction + if (this.yates ) { // apply Yate's correction diff -= 0.5; } if (diff > 0) @@ -106,7 +108,7 @@ public class Chi2FeatureSelection 0) { double diff = Math.abs(observ - expected); - if (yates) { // apply Yate's correction + if (this.yates) { // apply Yate's correction diff -= 0.5; } if (diff > 0) @@ -121,6 +123,8 @@ public class Chi2FeatureSelection chi2Function; + + private boolean yates = false; public Chi2FeatureSelection(String name) { this(name, 0.0); @@ -131,6 +135,17 @@ public class Chi2FeatureSelection> instances) { // aggregate statistics for all features - this.chi2Function = new Chi2Scorer(); + this.chi2Function = new Chi2Scorer(this.yates); for (Instance instance : instances) { OUTCOME_T outcome = instance.getOutcome(); for (Feature feature : instance.getFeatures()) { Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java?rev=1508435&view=auto ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java (added) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java Tue Jul 30 13:37:30 2013 @@ -0,0 +1,494 @@ +/* + * Copyright (c) 2012, Regents of the University of Colorado + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package org.apache.ctakes.assertion.eval; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.uima.cas.Feature; +import org.apache.uima.jcas.cas.TOP; +import org.apache.uima.jcas.tcas.Annotation; +import org.cleartk.eval.util.ConfusionMatrix; + +import com.google.common.base.Function; +import com.google.common.base.Objects; +import com.google.common.base.Objects.ToStringHelper; +import com.google.common.collect.HashMultiset; +import com.google.common.collect.Multiset; + +/** + * Stores statistics for comparing {@link Annotation}s extracted by a system to gold + * {@link Annotation}s. + * + *
+ * Copyright (c) 2012, Regents of the University of Colorado
+ * All rights reserved. + * + * @author Steven Bethard + */ +public class AnnotationStatisticsCompact> implements + Serializable { + + private static final long serialVersionUID = 1L; + + private Multiset referenceOutcomes; + + private Multiset predictedOutcomes; + + private Multiset correctOutcomes; + + private ConfusionMatrix confusionMatrix; + + /** + * Creates a {@link Function} that converts an {@link Annotation} into a hashable representation + * of its begin and end offsets. + * + * The {@link Function} created by this method is suitable for passing to the first + * {@link Function} argument of {@link #add(Collection, Collection, Function, Function)}. + */ + public static Function annotationToSpan() { + return new Function() { + @Override + public Span apply(ANNOTATION_TYPE annotation) { + return new Span(annotation); + } + }; + } + + /** + * Creates a {@link Function} that extracts a feature value from a {@link TOP}. + * + * The {@link Function} created by this method is suitable for passing to the second + * {@link Function} argument of {@link #add(Collection, Collection, Function, Function)}. + * + * @param featureName + * The name of the feature whose value is to be extracted. + */ + public static Function annotationToFeatureValue( + final String featureName) { + return new Function() { + @Override + public String apply(ANNOTATION_TYPE annotation) { + Feature feature = annotation.getType().getFeatureByBaseName(featureName); + return annotation.getFeatureValueAsString(feature); + } + }; + } + + /** + * Creates a {@link Function} that always returns null. + * + * This may be useful when only the span of the offset is important, but you still need to pass in + * the final argument of {@link #add(Collection, Collection, Function, Function)}. + */ + public static Function annotationToNull() { + return new Function() { + @Override + public OUTCOME_TYPE apply(ANNOTATION_TYPE annotation) { + return null; + } + }; + } + + /** + * Add all statistics together. + * + * This is often useful for combining individual fold statistics that result from methods like + * {@link Evaluation_ImplBase#crossValidation(List, int)}. + * + * @param statistics + * The sequence of statistics that should be combined. + * @return The combination of all the individual statistics. + */ + public static > AnnotationStatisticsCompact addAll( + Iterable> statistics) { + AnnotationStatisticsCompact result = new AnnotationStatisticsCompact(); + for (AnnotationStatisticsCompact item : statistics) { + result.addAll(item); + } + return result; + } + + /** + * Create an AnnotationStatisticsCompact that compares {@link Annotation}s based on their begin and end + * offsets, plus a {@link Feature} of the {@link Annotation} that represents the outcome or label. + */ + public AnnotationStatisticsCompact() { + this.referenceOutcomes = HashMultiset.create(); + this.predictedOutcomes = HashMultiset.create(); + this.correctOutcomes = HashMultiset.create(); + this.confusionMatrix = new ConfusionMatrix(); + } + + /** + * Update the statistics, comparing the reference annotations to the predicted annotations. + * + * Annotations are considered to match if they have the same character offsets in the text. All + * outcomes (e.g. as returned in {@link #confusions()}) will be null. + * + * @param referenceAnnotations + * The reference annotations, typically identified by humans. + * @param predictedAnnotations + * The predicted annotations, typically identified by a model. + */ + public void add( + Collection referenceAnnotations, + Collection predictedAnnotations) { + this.add( + referenceAnnotations, + predictedAnnotations, + AnnotationStatisticsCompact. annotationToSpan(), + AnnotationStatisticsCompact. annotationToNull()); + } + + /** + * Update the statistics, comparing the reference annotations to the predicted annotations. + * + * Annotations are considered to match if they have the same span (according to + * {@code annotationToSpan}) and if they have the same outcome (according to + * {@code annotationToOutcome}). + * + * @param referenceAnnotations + * The reference annotations, typically identified by humans. + * @param predictedAnnotations + * The predicted annotations, typically identified by a model. + * @param annotationToSpan + * A function that defines how to convert an annotation into a hashable object that + * represents the span of that annotation. The {@link #annotationToSpan()} method + * provides an example function that could be used here. + * @param annotationToOutcome + * A function that defines how to convert an annotation into an object that represents + * the outcome (or "label") assigned to that annotation. The + * {@link #annotationToFeatureValue(String)} method provides a sample function that could + * be used here. + */ + public void add( + Collection referenceAnnotations, + Collection predictedAnnotations, + Function annotationToSpan, + Function annotationToOutcome) { + + // map gold spans to their outcomes + Map referenceSpanOutcomes = new HashMap(); + for (ANNOTATION_TYPE ann : referenceAnnotations) { + referenceSpanOutcomes.put(annotationToSpan.apply(ann), annotationToOutcome.apply(ann)); + } + + // map system spans to their outcomes + Map predictedSpanOutcomes = new HashMap(); + for (ANNOTATION_TYPE ann : predictedAnnotations) { + predictedSpanOutcomes.put(annotationToSpan.apply(ann), annotationToOutcome.apply(ann)); + } + + // update the gold and system outcomes + this.referenceOutcomes.addAll(referenceSpanOutcomes.values()); + this.predictedOutcomes.addAll(predictedSpanOutcomes.values()); + + // determine the outcomes that were correct + Set intersection = new HashSet(); + intersection.addAll(referenceSpanOutcomes.keySet()); + intersection.retainAll(predictedSpanOutcomes.keySet()); + for (SPAN_TYPE span : intersection) { + OUTCOME_TYPE goldOutcome = referenceSpanOutcomes.get(span); + OUTCOME_TYPE systemOutcome = predictedSpanOutcomes.get(span); + if (Objects.equal(goldOutcome, systemOutcome)) { + this.correctOutcomes.add(goldOutcome); + } + } + + // update the confusion matrix + Set union = new HashSet(); + union.addAll(referenceSpanOutcomes.keySet()); + union.addAll(predictedSpanOutcomes.keySet()); + for (SPAN_TYPE span : union) { + OUTCOME_TYPE goldOutcome = referenceSpanOutcomes.get(span); + OUTCOME_TYPE systemOutcome = predictedSpanOutcomes.get(span); + this.confusionMatrix.add(goldOutcome, systemOutcome); + } + } + + /** + * Adds all the statistics collected by another AnnotationStatisticsCompact to this one. + * + * @param that + * The other statistics that should be added to this one. + */ + public void addAll(AnnotationStatisticsCompact that) { + this.referenceOutcomes.addAll(that.referenceOutcomes); + this.predictedOutcomes.addAll(that.predictedOutcomes); + this.correctOutcomes.addAll(that.correctOutcomes); + this.confusionMatrix.add(that.confusionMatrix); + } + + public int countCorrectOutcomes() { + return this.correctOutcomes.size(); + } + + public int countCorrectOutcomes(OUTCOME_TYPE outcome) { + return this.correctOutcomes.count(outcome); + } + + public int countPredictedOutcomes() { + return this.predictedOutcomes.size(); + } + + public int countPredictedOutcomes(OUTCOME_TYPE outcome) { + return this.predictedOutcomes.count(outcome); + } + + public int countReferenceOutcomes() { + return this.referenceOutcomes.size(); + } + + public int countReferenceOutcomes(OUTCOME_TYPE outcome) { + return this.referenceOutcomes.count(outcome); + } + + public int countFalseNegatives(OUTCOME_TYPE... positiveOutcomes) { + int numReferenceOutcomes = this.countReferenceOutcomes(); + int numPredictedOutcomes = this.countPredictedOutcomes(); + if (numReferenceOutcomes != numPredictedOutcomes) { + throw new IllegalStateException( + String.format( + "Expected number equal number of references outcomes and predicted outcomes. Had reference outcomes=%d, predicted outcomes=%d", + numReferenceOutcomes, + numPredictedOutcomes, + this.countPredictedOutcomes())); + } + int totalFalseNegatives = 0; + for (OUTCOME_TYPE positiveOutcome : positiveOutcomes) { + totalFalseNegatives += this.countReferenceOutcomes(positiveOutcome) + - this.countCorrectOutcomes(positiveOutcome); + } + return totalFalseNegatives; + } + + public int countFalsePositives(OUTCOME_TYPE... positiveOutcomes) { + int numReferenceOutcomes = this.countReferenceOutcomes(); + int numPredictedOutcomes = this.countPredictedOutcomes(); + if (numReferenceOutcomes != numPredictedOutcomes) { + throw new IllegalStateException( + String.format( + "Expected number equal number of references outcomes and predicted outcomes. Had reference outcomes=%d, predicted outcomes=%d", + numReferenceOutcomes, + numPredictedOutcomes, + this.countPredictedOutcomes())); + } + int totalFalsePositives = 0; + for (OUTCOME_TYPE positiveOutcome : positiveOutcomes) { + totalFalsePositives += this.countPredictedOutcomes(positiveOutcome) + - this.countCorrectOutcomes(positiveOutcome); + } + + return totalFalsePositives; + } + + public int countTrueNegatives(OUTCOME_TYPE... positiveOutcomes) { + int numReferenceOutcomes = this.countReferenceOutcomes(); + int numPredictedOutcomes = this.countPredictedOutcomes(); + if (numReferenceOutcomes != numPredictedOutcomes) { + throw new IllegalStateException( + String.format( + "Expected number equal number of references outcomes and predicted outcomes. Had reference outcomes=%d, predicted outcomes=%d", + numReferenceOutcomes, + numPredictedOutcomes, + this.countPredictedOutcomes())); + } + int totalTrueNegatives = this.countCorrectOutcomes(); + + for (OUTCOME_TYPE positiveOutcome : positiveOutcomes) { + totalTrueNegatives -= this.countCorrectOutcomes(positiveOutcome); + } + + return totalTrueNegatives; + + } + + public int countTruePositives(OUTCOME_TYPE... positiveOutcomes) { + int numReferenceOutcomes = this.countReferenceOutcomes(); + int numPredictedOutcomes = this.countPredictedOutcomes(); + if (numReferenceOutcomes != numPredictedOutcomes) { + throw new IllegalStateException( + String.format( + "Expected number equal number of references outcomes and predicted outcomes. Had reference outcomes=%d, predicted outcomes=%d", + numReferenceOutcomes, + numPredictedOutcomes, + this.countPredictedOutcomes())); + } + + int totalTruePositives = 0; + for (OUTCOME_TYPE positiveOutcome : positiveOutcomes) { + totalTruePositives += this.countCorrectOutcomes(positiveOutcome); + } + return totalTruePositives; + } + + /** + * Returns the {@link ConfusionMatrix} tabulating reference outcomes matched to predicted + * outcomes. + * + * @return The confusion matrix. + */ + public ConfusionMatrix confusions() { + return this.confusionMatrix; + } + + public double precision() { + int nSystem = this.countPredictedOutcomes(); + return nSystem == 0 ? 1.0 : ((double) this.countCorrectOutcomes()) / nSystem; + } + + public double precision(OUTCOME_TYPE outcome) { + int nSystem = this.countPredictedOutcomes(outcome); + return nSystem == 0 ? 1.0 : ((double) this.countCorrectOutcomes(outcome)) / nSystem; + } + + public double recall() { + int nGold = this.countReferenceOutcomes(); + return nGold == 0 ? 1.0 : ((double) this.countCorrectOutcomes()) / nGold; + } + + public double recall(OUTCOME_TYPE outcome) { + int nGold = this.countReferenceOutcomes(outcome); + return nGold == 0 ? 1.0 : ((double) this.countCorrectOutcomes(outcome)) / nGold; + } + + public double f(double beta) { + double p = this.precision(); + double r = this.recall(); + double num = (1 + beta * beta) * p * r; + double den = (beta * beta * p) + r; + return den == 0.0 ? 0.0 : num / den; + } + + public double f(double beta, OUTCOME_TYPE outcome) { + double p = this.precision(outcome); + double r = this.recall(outcome); + double num = (1 + beta * beta) * p * r; + double den = (beta * beta * p) + r; + return den == 0.0 ? 0.0 : num / den; + } + + public double f1() { + return this.f(1.0); + } + + public double f1(OUTCOME_TYPE outcome) { + return f(1.0, outcome); + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + result.append("P\tR\tF1\t#gold\t#system\t#correct\n"); + result.append(String.format( + "%.3f\t%.3f\t%.3f\t%d\t%d\t%d\tOVERALL\n", + this.precision(), + this.recall(), + this.f1(), + this.referenceOutcomes.size(), + this.predictedOutcomes.size(), + this.correctOutcomes.size())); + List outcomes = new ArrayList(this.referenceOutcomes.elementSet()); + if (outcomes.size() > 1) { + Collections.sort(outcomes); + for (OUTCOME_TYPE outcome : outcomes) { + result.append(String.format( + "%.3f\t%.3f\t%.3f\t%d\t%d\t%d\t%s\n", + this.precision(outcome), + this.recall(outcome), + this.f1(outcome), + this.referenceOutcomes.count(outcome), + this.predictedOutcomes.count(outcome), + this.correctOutcomes.count(outcome), + outcome)); + } + } + return result.toString(); + } + + public String toTsv() { + StringBuilder result = new StringBuilder(); + result.append(String.format( + "%s:%.3f\t%s:%.3f\t%s:%.3f\t", + "All",this.precision(), + "All",this.recall(), + "All",this.f1())); + List outcomes = new ArrayList(this.referenceOutcomes.elementSet()); + if (outcomes.size() > 1) { + Collections.sort(outcomes); + for (OUTCOME_TYPE outcome : outcomes) { + result.append(String.format( + "%s:%.3f\t%s:%.3f\t%s:%.3f\t", + outcome,this.precision(outcome), + outcome,this.recall(outcome), + outcome,this.f1(outcome))); + } + } + result.append("\n"); + return result.toString(); +} + + + private static class Span { + + public int end; + + public int begin; + + public Span(Annotation annotation) { + this.begin = annotation.getBegin(); + this.end = annotation.getEnd(); + } + + @Override + public int hashCode() { + return Objects.hashCode(this.begin, this.end); + } + + @Override + public boolean equals(Object obj) { + if (!this.getClass().equals(obj.getClass())) { + return false; + } + Span that = (Span) obj; + return this.begin == that.begin && this.end == that.end; + } + + @Override + public String toString() { + ToStringHelper helper = Objects.toStringHelper(this); + helper.add("begin", this.begin); + helper.add("end", this.end); + return helper.toString(); + } + } +} Propchange: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AnnotationStatisticsCompact.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java?rev=1508435&r1=1508434&r2=1508435&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/eval/AssertionEvaluation.java Tue Jul 30 13:37:30 2013 @@ -87,7 +87,6 @@ import org.cleartk.classifier.jar.Direct import org.cleartk.classifier.jar.GenericJarClassifierFactory; import org.cleartk.classifier.jar.JarClassifierBuilder; import org.cleartk.classifier.liblinear.LIBLINEARStringOutcomeDataWriter; -import org.cleartk.eval.AnnotationStatistics; import org.cleartk.eval.Evaluation_ImplBase; import org.cleartk.util.Options_ImplBase; import org.kohsuke.args4j.Option; @@ -113,7 +112,7 @@ import com.google.common.collect.Sets; //import org.chboston.cnlp.ctakes.relationextractor.eval.RelationExtractorEvaluation; //import org.chboston.cnlp.ctakes.relationextractor.ae.ModifierExtractorAnnotator; -public class AssertionEvaluation extends Evaluation_ImplBase> { +public class AssertionEvaluation extends Evaluation_ImplBase> { private static Logger logger = Logger.getLogger(AssertionEvaluation.class); @@ -352,20 +351,20 @@ private static Logger logger = Logger.ge // run cross-validation else if(options.crossValidationFolds != null) { // run n-fold cross-validation - List> foldStats = evaluation.crossValidation(trainFiles, options.crossValidationFolds); - //AnnotationStatistics overallStats = AnnotationStatistics.addAll(foldStats); - Map overallStats = new TreeMap(); + List> foldStats = evaluation.crossValidation(trainFiles, options.crossValidationFolds); + //AnnotationStatisticsCompact overallStats = AnnotationStatisticsCompact.addAll(foldStats); + Map overallStats = new TreeMap(); for (String currentAnnotationType : annotationTypes) { - AnnotationStatistics currentAnnotationStatistics = new AnnotationStatistics(); - overallStats.put(currentAnnotationType, currentAnnotationStatistics); + AnnotationStatisticsCompact currentAnnotationStatisticsCompact = new AnnotationStatisticsCompact(); + overallStats.put(currentAnnotationType, currentAnnotationStatisticsCompact); } - for (Map singleFoldMap : foldStats) + for (Map singleFoldMap : foldStats) { for (String currentAnnotationType : annotationTypes) { - AnnotationStatistics currentFoldStatistics = singleFoldMap.get(currentAnnotationType); + AnnotationStatisticsCompact currentFoldStatistics = singleFoldMap.get(currentAnnotationType); overallStats.get(currentAnnotationType).addAll(currentFoldStatistics); } } @@ -398,7 +397,7 @@ private static Logger logger = Logger.ge } logger.debug("testFiles.size() = " + testFiles.size()); CollectionReader testCollectionReader = evaluation.getCollectionReader(testFiles); - Map stats = evaluation.test(testCollectionReader, modelsDir); + Map stats = evaluation.test(testCollectionReader, modelsDir); AssertionEvaluation.printScore(stats, modelsDir.getAbsolutePath()); } @@ -476,12 +475,12 @@ private static void printOptionsForDebug logger.info(message); } -public static void printScore(Map map, String directory) +public static void printScore(Map map, String directory) { - for (Map.Entry currentEntry : map.entrySet()) + for (Map.Entry currentEntry : map.entrySet()) { String annotationType = currentEntry.getKey(); - AnnotationStatistics stats = currentEntry.getValue(); + AnnotationStatisticsCompact stats = currentEntry.getValue(); System.out.format("directory: \"%s\"; assertion type: %s%n%n%s%n%n", directory, @@ -491,11 +490,12 @@ public static void printScore(Map test(CollectionReader collectionReader, File directory) + protected Map test(CollectionReader collectionReader, File directory) throws Exception { // AnalysisEngine classifierAnnotator = AnalysisEngineFactory.createPrimitive(AssertionCleartkAnalysisEngine.getDescription( // GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, @@ -839,14 +839,14 @@ public static void printScore(Map map = new TreeMap(); + Map map = new TreeMap(); if (!options.ignorePolarity) { map.put("polarity", polarityStats); @@ -921,8 +921,8 @@ public static void printScore(MapannotationToSpan(), - AnnotationStatistics.annotationToFeatureValue("polarity")); + AnnotationStatisticsCompact.annotationToSpan(), + AnnotationStatisticsCompact.annotationToFeatureValue("polarity")); if(options.printErrors){ printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "polarity", CONST.NE_POLARITY_NEGATION_PRESENT, Integer.class); } @@ -931,8 +931,8 @@ public static void printScore(MapannotationToSpan(), - AnnotationStatistics.annotationToFeatureValue("conditional")); + AnnotationStatisticsCompact.annotationToSpan(), + AnnotationStatisticsCompact.annotationToFeatureValue("conditional")); if(options.printErrors){ printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "conditional", CONST.NE_CONDITIONAL_TRUE, Boolean.class); } @@ -941,8 +941,8 @@ public static void printScore(MapannotationToSpan(), - AnnotationStatistics.annotationToFeatureValue("uncertainty")); + AnnotationStatisticsCompact.annotationToSpan(), + AnnotationStatisticsCompact.annotationToFeatureValue("uncertainty")); if(options.printErrors){ printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "uncertainty", CONST.NE_UNCERTAINTY_PRESENT, Integer.class); } @@ -951,8 +951,8 @@ public static void printScore(MapannotationToSpan(), - AnnotationStatistics.annotationToFeatureValue("subject")); + AnnotationStatisticsCompact.annotationToSpan(), + AnnotationStatisticsCompact.annotationToFeatureValue("subject")); if(options.printErrors){ printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "subject", null, CONST.ATTR_SUBJECT_PATIENT.getClass()); } @@ -961,8 +961,8 @@ public static void printScore(MapannotationToSpan(), - AnnotationStatistics.annotationToFeatureValue("generic")); + AnnotationStatisticsCompact.annotationToSpan(), + AnnotationStatisticsCompact.annotationToFeatureValue("generic")); if(options.printErrors){ printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "generic", CONST.NE_GENERIC_TRUE, Boolean.class); } @@ -972,8 +972,8 @@ public static void printScore(MapannotationToSpan(), - AnnotationStatistics.annotationToFeatureValue("historyOf")); + AnnotationStatisticsCompact.annotationToSpan(), + AnnotationStatisticsCompact.annotationToFeatureValue("historyOf")); if(options.printErrors){ printErrors(jCas, goldEntitiesAndEvents, systemEntitiesAndEvents, "historyOf", CONST.NE_HISTORY_OF_PRESENT, Integer.class); } @@ -993,37 +993,37 @@ public static void printScore(Map featureSelection; if (currentAssertionAttribute.equals("polarity")) { // TODO: parameterize the thresholds - featureSelection = PolarityCleartkAnalysisEngine.createFeatureSelection(1f); + featureSelection = PolarityCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold); featureSelection.train(instances); featureSelection.save(PolarityCleartkAnalysisEngine.createFeatureSelectionURI(directory)); } else if (currentAssertionAttribute.equals("uncertainty")) { // TODO: parameterize the thresholds - featureSelection = UncertaintyCleartkAnalysisEngine.createFeatureSelection(1f); + featureSelection = UncertaintyCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold); featureSelection.train(instances); featureSelection.save(UncertaintyCleartkAnalysisEngine.createFeatureSelectionURI(directory)); } else if (currentAssertionAttribute.equals("conditional")) { // TODO: parameterize the thresholds - featureSelection = ConditionalCleartkAnalysisEngine.createFeatureSelection(1f); + featureSelection = ConditionalCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold); featureSelection.train(instances); featureSelection.save(ConditionalCleartkAnalysisEngine.createFeatureSelectionURI(directory)); } else if (currentAssertionAttribute.equals("subject")) { // TODO: parameterize the thresholds - featureSelection = SubjectCleartkAnalysisEngine.createFeatureSelection(1f); + featureSelection = SubjectCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold); featureSelection.train(instances); featureSelection.save(SubjectCleartkAnalysisEngine.createFeatureSelectionURI(directory)); } else if (currentAssertionAttribute.equals("generic")) { // TODO: parameterize the thresholds - featureSelection = GenericCleartkAnalysisEngine.createFeatureSelection(1f); + featureSelection = GenericCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold); featureSelection.train(instances); featureSelection.save(GenericCleartkAnalysisEngine.createFeatureSelectionURI(directory)); } else if (currentAssertionAttribute.equals("historyOf")) { // TODO: parameterize the thresholds - featureSelection = HistoryCleartkAnalysisEngine.createFeatureSelection(1f); + featureSelection = HistoryCleartkAnalysisEngine.createFeatureSelection(options.featureSelectionThreshold); featureSelection.train(instances); featureSelection.save(HistoryCleartkAnalysisEngine.createFeatureSelectionURI(directory)); } Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java?rev=1508435&r1=1508434&r2=1508435&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/ConditionalCleartkAnalysisEngine.java Tue Jul 30 13:37:30 2013 @@ -72,7 +72,7 @@ public class ConditionalCleartkAnalysisE } } public static FeatureSelection createFeatureSelection(double threshold) { - return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold); + return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false); // return new MutualInformationFeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME); } Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java?rev=1508435&r1=1508434&r2=1508435&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/GenericCleartkAnalysisEngine.java Tue Jul 30 13:37:30 2013 @@ -94,7 +94,7 @@ public class GenericCleartkAnalysisEngin } } public static FeatureSelection createFeatureSelection(double threshold) { - return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold); + return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false); // return new MutualInformationFeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME); } Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java?rev=1508435&r1=1508434&r2=1508435&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/HistoryCleartkAnalysisEngine.java Tue Jul 30 13:37:30 2013 @@ -97,7 +97,7 @@ public class HistoryCleartkAnalysisEngin } } public static FeatureSelection createFeatureSelection(double threshold) { - return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold); + return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false); // return new MutualInformationFeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME); } Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java?rev=1508435&r1=1508434&r2=1508435&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/PolarityCleartkAnalysisEngine.java Tue Jul 30 13:37:30 2013 @@ -93,7 +93,7 @@ public class PolarityCleartkAnalysisEngi } } public static FeatureSelection createFeatureSelection(double threshold) { - return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold); + return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false); // return new MutualInformationFeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME); } Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java?rev=1508435&r1=1508434&r2=1508435&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/SubjectCleartkAnalysisEngine.java Tue Jul 30 13:37:30 2013 @@ -97,7 +97,7 @@ public class SubjectCleartkAnalysisEngin } } public static FeatureSelection createFeatureSelection(double threshold) { - return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold); + return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false); // return new MutualInformationFeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME); } Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java?rev=1508435&r1=1508434&r2=1508435&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/UncertaintyCleartkAnalysisEngine.java Tue Jul 30 13:37:30 2013 @@ -79,7 +79,7 @@ public class UncertaintyCleartkAnalysisE } public static FeatureSelection createFeatureSelection(double threshold) { - return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold); + return new Chi2FeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME, threshold, false); // return new MutualInformationFeatureSelection(AssertionCleartkAnalysisEngine.FEATURE_SELECTION_NAME); } Modified: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java?rev=1508435&r1=1508434&r2=1508435&view=diff ============================================================================== --- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java (original) +++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/train/CrossValidateChi2GridSearch.java Tue Jul 30 13:37:30 2013 @@ -14,11 +14,11 @@ public class CrossValidateChi2GridSearch AssertionEvaluation.useEvaluationLogFile = true; - float[] threshs = {2f, 5f, 10f, 20f, 50f, 100f}; + float[] threshs = {1f, 5f, 10f, 50f, 100f}; for (Float chi2threshold : threshs ) { System.out.println("BEGIN Chi2 Grid Search with threshold = "+ Float.toString(chi2threshold)); - AssertionEvaluation.evaluationLogFileOut.write("BEGIN Chi2 Grid Search with threshold = "+ Float.toString(chi2threshold)); - AssertionEvaluation.evaluationLogFileOut.flush(); +// AssertionEvaluation.evaluationLogFileOut.write("BEGIN Chi2 Grid Search with threshold = "+ Float.toString(chi2threshold)+"\n"); +// AssertionEvaluation.evaluationLogFileOut.flush(); for (String attribute : AssertionConst.annotationTypes) {