Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 51DE61023D for ; Thu, 13 Mar 2014 20:03:43 +0000 (UTC) Received: (qmail 25744 invoked by uid 500); 13 Mar 2014 20:03:40 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 25705 invoked by uid 500); 13 Mar 2014 20:03:39 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 25697 invoked by uid 99); 13 Mar 2014 20:03:39 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 13 Mar 2014 20:03:39 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 13 Mar 2014 20:03:33 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id AD39B238897A; Thu, 13 Mar 2014 20:03:10 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1577303 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/ ae/feature/treekernel/ eval/ Date: Thu, 13 Mar 2014 20:03:10 -0000 To: commits@ctakes.apache.org From: clin@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140313200310.AD39B238897A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: clin Date: Thu Mar 13 20:03:10 2014 New Revision: 1577303 URL: http://svn.apache.org/r1577303 Log: create tree-kernel-version DocTimeRel evaluation code, which will call DocTimeRelWithTreeAnnotator. Tried to derived single constituent tree, and simple dependency tree. Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java (with props) ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java (with props) ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java (with props) ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java (with props) Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java?rev=1577303&view=auto ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java (added) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java Thu Mar 13 20:03:10 2014 @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.temporal.ae; + +import java.io.File; +import java.util.List; + +import org.apache.ctakes.temporal.ae.feature.ClosestVerbExtractor; +import org.apache.ctakes.temporal.ae.feature.DateAndMeasurementExtractor; +import org.apache.ctakes.temporal.ae.feature.EventPropertyExtractor; +import org.apache.ctakes.temporal.ae.feature.NearbyVerbTenseXExtractor; +import org.apache.ctakes.temporal.ae.feature.SectionHeaderExtractor; +import org.apache.ctakes.temporal.ae.feature.TimeXExtractor; +import org.apache.ctakes.temporal.ae.feature.treekernel.DependencySingleTreeExtractor; +import org.apache.ctakes.temporal.ae.feature.treekernel.SyntaticSingleTreeExtractor; +import org.apache.ctakes.typesystem.type.syntax.BaseToken; +import org.apache.ctakes.typesystem.type.textsem.EventMention; +import org.apache.uima.UimaContext; +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.cleartk.classifier.CleartkAnnotator; +import org.cleartk.classifier.DataWriter; +import org.cleartk.classifier.Feature; +import org.cleartk.classifier.Instance; +import org.cleartk.classifier.feature.extractor.CleartkExtractor; +import org.cleartk.classifier.feature.extractor.CleartkExtractor.Covered; +import org.cleartk.classifier.feature.extractor.CleartkExtractor.Following; +import org.cleartk.classifier.feature.extractor.CleartkExtractor.Preceding; +import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor; +import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor; +import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor; +import org.cleartk.classifier.jar.DefaultDataWriterFactory; +import org.cleartk.classifier.jar.DirectoryDataWriterFactory; +import org.cleartk.classifier.jar.GenericJarClassifierFactory; +import org.uimafit.factory.AnalysisEngineFactory; +import org.uimafit.util.JCasUtil; + +public class DocTimeRelWithTreeAnnotator extends CleartkAnnotator { + + public static AnalysisEngineDescription createDataWriterDescription( + Class> dataWriterClass, + File outputDirectory) throws ResourceInitializationException { + return AnalysisEngineFactory.createPrimitiveDescription( + DocTimeRelWithTreeAnnotator.class, + CleartkAnnotator.PARAM_IS_TRAINING, + true, + DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, + dataWriterClass, + DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, + outputDirectory); + } + + public static AnalysisEngineDescription createAnnotatorDescription(File modelDirectory) + throws ResourceInitializationException { + return AnalysisEngineFactory.createPrimitiveDescription( + DocTimeRelWithTreeAnnotator.class, + CleartkAnnotator.PARAM_IS_TRAINING, + false, + GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, + new File(modelDirectory, "model.jar")); + } + + private CleartkExtractor contextExtractor; + private NearbyVerbTenseXExtractor verbTensePatternExtractor; + private SectionHeaderExtractor sectionIDExtractor; + private ClosestVerbExtractor closestVerbExtractor; + private TimeXExtractor timeXExtractor; + private EventPropertyExtractor genericExtractor; + private DateAndMeasurementExtractor dateExtractor; + private DependencySingleTreeExtractor depTreeEctractor; + private SyntaticSingleTreeExtractor singleTreeExtractor; + + @Override + public void initialize(UimaContext context) throws ResourceInitializationException { + super.initialize(context); + CombinedExtractor baseExtractor = new CombinedExtractor( + new CoveredTextExtractor(), + new TypePathExtractor(BaseToken.class, "partOfSpeech")); + this.contextExtractor = new CleartkExtractor( + BaseToken.class, + baseExtractor, + new Preceding(3), + new Covered(), + new Following(3)); + this.verbTensePatternExtractor = new NearbyVerbTenseXExtractor(); + this.sectionIDExtractor = new SectionHeaderExtractor(); + this.closestVerbExtractor = new ClosestVerbExtractor(); + this.timeXExtractor = new TimeXExtractor(); + this.genericExtractor = new EventPropertyExtractor(); + this.dateExtractor = new DateAndMeasurementExtractor(); + this.depTreeEctractor = new DependencySingleTreeExtractor(); + this.singleTreeExtractor = new SyntaticSingleTreeExtractor(); + } + + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + for (EventMention eventMention : JCasUtil.select(jCas, EventMention.class)) { + if (eventMention.getEvent() != null) { + List features = this.contextExtractor.extract(jCas, eventMention); + features.addAll(this.verbTensePatternExtractor.extract(jCas, eventMention));//add nearby verb POS pattern feature + features.addAll(this.sectionIDExtractor.extract(jCas, eventMention)); //add section heading + features.addAll(this.closestVerbExtractor.extract(jCas, eventMention)); //add closest verb + features.addAll(this.timeXExtractor.extract(jCas, eventMention)); //add the closest time expression types + features.addAll(this.genericExtractor.extract(jCas, eventMention)); //add the closest time expression types + features.addAll(this.dateExtractor.extract(jCas, eventMention)); //add the closest NE type + features.addAll(this.depTreeEctractor.extract(jCas, eventMention)); + features.addAll(this.singleTreeExtractor.extract(jCas, eventMention));//add the single tree that covers the event. + if (this.isTraining()) { + String outcome = eventMention.getEvent().getProperties().getDocTimeRel(); + this.dataWriter.write(new Instance(outcome, features)); + } else { + String outcome = this.classifier.classify(features); + eventMention.getEvent().getProperties().setDocTimeRel(outcome); + } + } + } + } +} Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DocTimeRelWithTreeAnnotator.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java?rev=1577303&view=auto ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java (added) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java Thu Mar 13 20:03:10 2014 @@ -0,0 +1,105 @@ +package org.apache.ctakes.temporal.ae.feature.treekernel; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import org.apache.ctakes.dependency.parser.util.AnnotationDepUtils; +import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode; +import org.apache.ctakes.typesystem.type.textsem.EventMention; +import org.apache.ctakes.typesystem.type.textspan.Sentence; +import org.apache.ctakes.utils.tree.SimpleTree; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.cleartk.classifier.Feature; +import org.cleartk.classifier.TreeFeature; +import org.cleartk.classifier.feature.extractor.CleartkExtractorException; +import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor; +import org.uimafit.util.JCasUtil; +/** + * Given a focused annotation, get the whole sentence-level dependency tree that cover this annotation. + * @author CH151862 + * + */ +public class DependencySingleTreeExtractor implements SimpleFeatureExtractor { + + public static final String FEAT_NAME = "TK_DepSingleT"; + + @Override +// public List extract(JCas view, Annotation focusAnnotation) +// throws CleartkExtractorException { +// List features = new ArrayList(); +// //1 generate event annotation array and label array +// Annotation[] annotations = {focusAnnotation}; +// String[] labels ={"EVENT"}; +// +// //2 get covering sentence: +// Map> coveringMap = +// JCasUtil.indexCovering(view, EventMention.class, Sentence.class); +// EventMention targetTokenAnnotation = (EventMention)focusAnnotation; +// Collection sentList = coveringMap.get(targetTokenAnnotation); +// +// //3 extract trees: +// String dtreeStr ="(TOP (EVENT " + focusAnnotation.getCoveredText().trim() + "))"; +// if (sentList != null && !sentList.isEmpty()){ +// for(Sentence sent : sentList) { +// List nodes = JCasUtil.selectCovered(view, ConllDependencyNode.class, sent); +// +// if(nodes!=null && !nodes.isEmpty()){ +// String treeStr = AnnotationDepUtils.getTokenTreeString(view, nodes, annotations, labels, true); +// if(treeStr != null){ +// dtreeStr = treeStr; +// break; +// } +// } +// } +// } +// +// features.add(new TreeFeature(FEAT_NAME, dtreeStr)); +// +// return features; +// } + + public List extract(JCas view, Annotation focusAnnotation) + throws CleartkExtractorException { + List features = new ArrayList(); + String dtreeStr ="(TOP (EVENT " + focusAnnotation.getCoveredText().trim() + "))"; + //find the colldepnode covered by focusAnnotation: + for(ConllDependencyNode node : JCasUtil.selectCovered(view, ConllDependencyNode.class, focusAnnotation)){ + //find if it has head: + ConllDependencyNode head = node.getHead(); + SimpleTree curTree = null; + SimpleTree headTree = null; + + if(head == null) { //if the current node is the root, then not right + continue; + } + +// curTree = SimpleTree.fromString(String.format("(%s %s)", node.getDeprel(), SimpleTree.escapeCat(node.getCoveredText().trim()))); + curTree = SimpleTree.fromString(String.format("(%s %s)", node.getDeprel(), node.getPostag())); + + + + while(head.getHead() != null){ //while head node is not the root +// String token = node.getHead().getHead() == null ? "TOP" : node.getHead().getCoveredText(); +// headTree = SimpleTree.fromString(String.format("(%s %s)", head.getDeprel(), SimpleTree.escapeCat(head.getCoveredText().trim()))); + headTree = SimpleTree.fromString(String.format("(%s %s)", head.getDeprel(), head.getPostag())); + curTree.parent = headTree.children.get(0); + headTree.children.get(0).addChild(curTree); + curTree = headTree; + head = head.getHead(); + } + if(headTree==null){ + curTree = SimpleTree.fromString(String.format("(%s (%s %s))",node.getDeprel(), node.getPostag(),"null")); + dtreeStr = curTree.toString(); + }else{ + dtreeStr = headTree.toString(); + } + break; + } + + features.add(new TreeFeature(FEAT_NAME, dtreeStr)); + return features; + } +} Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/DependencySingleTreeExtractor.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java?rev=1577303&view=auto ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java (added) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java Thu Mar 13 20:03:10 2014 @@ -0,0 +1,58 @@ +package org.apache.ctakes.temporal.ae.feature.treekernel; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.ctakes.constituency.parser.treekernel.TreeExtractor; +import org.apache.ctakes.constituency.parser.util.AnnotationTreeUtils; +import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode; +import org.apache.ctakes.typesystem.type.textsem.EventMention; +import org.apache.ctakes.utils.tree.SimpleTree; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.cleartk.classifier.Feature; +import org.cleartk.classifier.TreeFeature; +import org.cleartk.classifier.feature.extractor.CleartkExtractorException; +import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor; +/** + * Given a focused annotation, get the whole sentence-level parse tree that cover this annotation. + * @author CH151862 + * + */ +public class SyntaticSingleTreeExtractor implements SimpleFeatureExtractor { + + public static final String FEAT_NAME = "TK_SingleT"; + + @Override + public List extract(JCas view, Annotation focusAnnotation) + throws CleartkExtractorException { + List features = new ArrayList(); + // first get the root and print it out... + TopTreebankNode root = AnnotationTreeUtils.getTreeCopy(view, AnnotationTreeUtils.getAnnotationTree(view, focusAnnotation)); + + if(root == null){ + SimpleTree fakeTree = new SimpleTree("(S (NN null))"); + features.add(new TreeFeature(FEAT_NAME, fakeTree.toString())); + return features; + } + + + String etype=""; + String eventModality=""; + + if(focusAnnotation instanceof EventMention){ + eventModality = ((EventMention)focusAnnotation).getEvent().getProperties().getContextualModality(); + etype = "EVENT-"+eventModality; + AnnotationTreeUtils.insertAnnotationNode(view, root, focusAnnotation, etype); + } + + SimpleTree tree = null; + tree = TreeExtractor.getSimpleClone(root); + + TemporalPETExtractor.moveTimexDownToNP(tree); + + features.add(new TreeFeature(FEAT_NAME, tree.toString())); + return features; + } + +} Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/treekernel/SyntaticSingleTreeExtractor.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java?rev=1577303&view=auto ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java (added) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java Thu Mar 13 20:03:10 2014 @@ -0,0 +1,330 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.temporal.eval; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.FileHandler; +import java.util.logging.Formatter; +import java.util.logging.Level; +import java.util.logging.LogRecord; +import java.util.logging.Logger; + +//import org.apache.ctakes.temporal.ae.ContextualModalityAnnotator; +import org.apache.ctakes.temporal.ae.DocTimeRelWithTreeAnnotator; +import org.apache.ctakes.temporal.eval.EvaluationOfEventTimeRelations.ParameterSettings; +import org.apache.ctakes.typesystem.type.refsem.EventProperties; +import org.apache.ctakes.typesystem.type.textsem.EventMention; +import org.apache.ctakes.typesystem.type.textsem.TimeMention; +import org.apache.ctakes.typesystem.type.textspan.Segment; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Feature; +import org.apache.uima.collection.CollectionReader; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.FileUtils; +import org.cleartk.classifier.jar.JarClassifierBuilder; +//import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter; +import org.cleartk.classifier.tksvmlight.model.CompositeKernel.ComboOperator; +//import org.cleartk.classifier.liblinear.LIBLINEARStringOutcomeDataWriter; +import org.cleartk.eval.AnnotationStatistics; +import org.cleartk.ml.libsvm.tk.TKLIBSVMStringOutcomeDataWriter; +import org.cleartk.util.ViewURIUtil; +import org.uimafit.component.JCasAnnotator_ImplBase; +import org.uimafit.factory.AggregateBuilder; +import org.uimafit.factory.AnalysisEngineFactory; +import org.uimafit.pipeline.JCasIterable; +import org.uimafit.pipeline.SimplePipeline; +//import org.uimafit.testing.util.HideOutput; +import org.uimafit.util.JCasUtil; + +import com.google.common.base.Function; +import com.google.common.collect.Maps; +import com.lexicalscope.jewel.cli.CliFactory; +import com.lexicalscope.jewel.cli.Option; + +public class EvaluationOfEventPropertiesTk extends +Evaluation_ImplBase>> { + static interface TempRelOptions extends Evaluation_ImplBase.Options{ + @Option + public boolean getPrintFormattedRelations(); + + @Option + public boolean getBaseline(); + + @Option + public boolean getClosure(); + + @Option + public boolean getUseTmp(); + + @Option + public boolean getUseGoldAttributes(); + } + + private static final String DOC_TIME_REL = "docTimeRel"; + private static final String CONTEXTUAL_MODALITY = "contextualModality"; + + private static final List PROPERTY_NAMES = Arrays.asList(DOC_TIME_REL, CONTEXTUAL_MODALITY); + + protected static boolean DEFAULT_BOTH_DIRECTIONS = false; + protected static float DEFAULT_DOWNSAMPLE = 1.0f; + protected static ParameterSettings allParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS, DEFAULT_DOWNSAMPLE, "tk", + 10.0, 1.0, "polynomial", ComboOperator.SUM, 1, 0.4); + public static void main(String[] args) throws Exception { + ParameterSettings params = allParams; + TempRelOptions options = CliFactory.parseArguments(TempRelOptions.class, args); + List patientSets = options.getPatients().getList(); + List trainItems = THYMEData.getTrainPatientSets(patientSets); + List devItems = THYMEData.getDevPatientSets(patientSets); + List testItems = THYMEData.getTestPatientSets(patientSets); + + try{ + File workingDir = new File("target/eval/event-properties"); + if(!workingDir.exists()) workingDir.mkdirs(); + if(options.getUseTmp()){ + File tempModelDir = File.createTempFile("temporal", null, workingDir); + tempModelDir.delete(); + tempModelDir.mkdir(); + workingDir = tempModelDir; + } + + EvaluationOfEventPropertiesTk evaluation = new EvaluationOfEventPropertiesTk( + workingDir, + options.getRawTextDirectory(), + options.getXMLDirectory(), + options.getXMLFormat(), + options.getXMIDirectory(), + options.getKernelParams(), + params); + evaluation.prepareXMIsFor(patientSets); + evaluation.logClassificationErrors(workingDir, "ctakes-event-property-errors"); + + List training = trainItems; + List testing = null; + if(options.getTest()){ + training.addAll(devItems); + testing = testItems; + }else{ + testing = devItems; + } + Map> stats = evaluation.trainAndTest(training, testing); + + for (String name : PROPERTY_NAMES) { + System.err.println("===================="); + System.err.println(name); + System.err.println("--------------------"); + System.err.println(stats.get(name)); + } + if(options.getUseTmp()){ + // won't work because it's not empty. should we be concerned with this or is it responsibility of + // person invoking the tmp flag? + FileUtils.deleteRecursive(workingDir); + } + }catch(ResourceInitializationException e){ + System.err.println("Error with parameter settings: " + params); + e.printStackTrace(); + } + } + + private Map loggers = Maps.newHashMap(); + protected ParameterSettings params = null; + + public EvaluationOfEventPropertiesTk( + File baseDirectory, + File rawTextDirectory, + File xmlDirectory, + XMLFormat xmlFormat, + File xmiDirectory, + String kernelParams, + ParameterSettings params) { + super(baseDirectory, rawTextDirectory, xmlDirectory, xmlFormat, xmiDirectory, null); + this.params = params; + this.kernelParams = kernelParams == null ? null : kernelParams.split(" "); + for (String name : PROPERTY_NAMES) { + this.loggers.put(name, Logger.getLogger(String.format("%s.%s", this.getClass().getName(), name))); + } + } + + @Override + protected void train(CollectionReader collectionReader, File directory) throws Exception { + AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); + aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class)); + aggregateBuilder.add(CopyFromGold.getDescription(TimeMention.class)); + aggregateBuilder.add(DocTimeRelWithTreeAnnotator.createDataWriterDescription( + // LIBSVMStringOutcomeDataWriter.class, + TKLIBSVMStringOutcomeDataWriter.class, + new File(directory, DOC_TIME_REL))); + // aggregateBuilder.add(ContextualModalityAnnotator.createDataWriterDescription(LIBSVMStringOutcomeDataWriter.class, new File(directory, CONTEXTUAL_MODALITY))); + SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate()); + // for(String propertyName : PROPERTY_NAMES){ + // JarClassifierBuilder.trainAndPackage(new File(directory, propertyName), "-h","0","-c", "1000"); + // } + String[] optArray; + + if(this.kernelParams == null){ + ArrayList svmOptions = new ArrayList(); + svmOptions.add("-c"); svmOptions.add(""+params.svmCost); // svm cost + svmOptions.add("-t"); svmOptions.add(""+params.svmKernelIndex); // kernel index + svmOptions.add("-d"); svmOptions.add("3"); // degree parameter for polynomial + svmOptions.add("-g"); svmOptions.add(""+params.svmGamma); + if(params.svmKernelIndex==ParameterSettings.SVM_KERNELS.indexOf("tk")){ + svmOptions.add("-S"); svmOptions.add(""+params.secondKernelIndex); // second kernel index (similar to -t) for composite kernel + String comboFlag = (params.comboOperator == ComboOperator.SUM ? "+" : params.comboOperator == ComboOperator.PRODUCT ? "*" : params.comboOperator == ComboOperator.TREE_ONLY ? "T" : "V"); + svmOptions.add("-C"); svmOptions.add(comboFlag); + svmOptions.add("-L"); svmOptions.add(""+params.lambda); + svmOptions.add("-T"); svmOptions.add(""+params.tkWeight); + svmOptions.add("-N"); svmOptions.add("3"); // normalize trees and features + } + optArray = svmOptions.toArray(new String[]{}); + }else{ + optArray = this.kernelParams; + for(int i = 0; i < optArray.length; i+=2){ + optArray[i] = "-" + optArray[i]; + } + } + + // HideOutput hider = new HideOutput(); + JarClassifierBuilder.trainAndPackage(new File(directory,DOC_TIME_REL), optArray); + // JarClassifierBuilder.trainAndPackage(new File(directory,CONTEXTUAL_MODALITY), "-h","0","-c", "1000"); + // hider.restoreOutput(); + // hider.close(); + } + + @Override + protected Map> test( + CollectionReader collectionReader, + File directory) throws Exception { + AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); + aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class)); + aggregateBuilder.add(CopyFromGold.getDescription(TimeMention.class)); + aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(ClearEventProperties.class)); + aggregateBuilder.add(DocTimeRelWithTreeAnnotator.createAnnotatorDescription(new File(directory, DOC_TIME_REL))); + // aggregateBuilder.add(ContextualModalityAnnotator.createAnnotatorDescription(new File(directory, CONTEXTUAL_MODALITY))); + + Function eventMentionToSpan = AnnotationStatistics.annotationToSpan(); + Map> propertyGetters; + propertyGetters = new HashMap>(); + for (String name : PROPERTY_NAMES) { + propertyGetters.put(name, getPropertyGetter(name)); + } + + Map> statsMap = new HashMap>(); + + for(String propertyName : PROPERTY_NAMES){ + statsMap.put(propertyName, new AnnotationStatistics()); + } + + for (JCas jCas : new JCasIterable(collectionReader, aggregateBuilder.createAggregate())) { + JCas goldView = jCas.getView(GOLD_VIEW_NAME); + JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA); + String text = goldView.getDocumentText(); + for (Segment segment : JCasUtil.select(jCas, Segment.class)) { + if (!THYMEData.SEGMENTS_TO_SKIP.contains(segment.getId())) { + List goldEvents = selectExact(goldView, EventMention.class, segment); + List systemEvents = selectExact(systemView, EventMention.class, segment); + for (String name : PROPERTY_NAMES) { + this.loggers.get(name).fine("Errors in : " + ViewURIUtil.getURI(jCas).toString()); + Function getProperty = propertyGetters.get(name); + statsMap.get(name).add( + goldEvents, + systemEvents, + eventMentionToSpan, + getProperty); + for (int i = 0; i < goldEvents.size(); ++i) { + String goldOutcome = getProperty.apply(goldEvents.get(i)); + String systemOutcome = getProperty.apply(systemEvents.get(i)); + if (!goldOutcome.equals(systemOutcome)) { + EventMention event = goldEvents.get(i); + int begin = event.getBegin(); + int end = event.getEnd(); + int windowBegin = Math.max(0, begin - 50); + int windowEnd = Math.min(text.length(), end + 50); + this.loggers.get(name).fine(String.format( + "%s was %s but should be %s, in ...%s[!%s!:%d-%d]%s...", + name, + systemOutcome, + goldOutcome, + text.substring(windowBegin, begin).replaceAll("[\r\n]", " "), + text.substring(begin, end), + begin, + end, + text.substring(end, windowEnd).replaceAll("[\r\n]", " "))); + } + } + } + } + } + } + return statsMap; + } + + public void logClassificationErrors(File outputDir, String outputFilePrefix) throws IOException { + if (!outputDir.exists()) { + outputDir.mkdirs(); + } + for (String name : PROPERTY_NAMES) { + Logger logger = this.loggers.get(name); + logger.setLevel(Level.FINE); + File outputFile = new File(outputDir, String.format("%s.%s.log", outputFilePrefix, name)); + FileHandler handler = new FileHandler(outputFile.getPath()); + handler.setFormatter(new Formatter() { + @Override + public String format(LogRecord record) { + return record.getMessage() + '\n'; + } + }); + logger.addHandler(handler); + } + } + + private static Function getPropertyGetter(final String propertyName) { + return new Function() { + @Override + public String apply(EventMention eventMention) { + EventProperties eventProperties = eventMention.getEvent().getProperties(); + Feature feature = eventProperties.getType().getFeatureByBaseName(propertyName); + return eventProperties.getFeatureValueAsString(feature); + } + }; + } + + public static class ClearEventProperties extends JCasAnnotator_ImplBase { + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + for (EventProperties eventProperties : JCasUtil.select(jCas, EventProperties.class)) { + eventProperties.setAspect(null); + eventProperties.setCategory(null); + eventProperties.setContextualAspect(null); + eventProperties.setContextualModality(null); + eventProperties.setDegree(null); + eventProperties.setDocTimeRel(null); + eventProperties.setPermanence(null); + eventProperties.setPolarity(0); + } + } + + } +} Propchange: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventPropertiesTk.java ------------------------------------------------------------------------------ svn:mime-type = text/plain