Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 18D84200B82 for ; Fri, 16 Sep 2016 23:26:46 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 17B7D160AB7; Fri, 16 Sep 2016 21:26:46 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 52B17160AC4 for ; Fri, 16 Sep 2016 23:26:44 +0200 (CEST) Received: (qmail 91492 invoked by uid 500); 16 Sep 2016 21:26:43 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 91477 invoked by uid 99); 16 Sep 2016 21:26:43 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 16 Sep 2016 21:26:43 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id 0029FC11CD for ; Fri, 16 Sep 2016 21:26:43 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 0.676 X-Spam-Level: X-Spam-Status: No, score=0.676 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RP_MATCHES_RCVD=-1.124] autolearn=disabled Received: from mx1-lw-us.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id y-9xVFhP6lOx for ; Fri, 16 Sep 2016 21:26:40 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx1-lw-us.apache.org (ASF Mail Server at mx1-lw-us.apache.org) with ESMTP id 706955F480 for ; Fri, 16 Sep 2016 21:26:40 +0000 (UTC) Received: from svn01-us-west.apache.org (svn.apache.org [10.41.0.6]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id A32EDE002B for ; Fri, 16 Sep 2016 21:26:39 +0000 (UTC) Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id 87E7A3A0248 for ; Fri, 16 Sep 2016 21:26:39 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1761094 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal: ae/DeepPheAnaforaXMLReader.java eval/EvaluationOfEventDocTimeRelDeepPhe.java Date: Fri, 16 Sep 2016 21:26:39 -0000 To: commits@ctakes.apache.org From: clin@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20160916212639.87E7A3A0248@svn01-us-west.apache.org> archived-at: Fri, 16 Sep 2016 21:26:46 -0000 Author: clin Date: Fri Sep 16 21:26:38 2016 New Revision: 1761094 URL: http://svn.apache.org/viewvc?rev=1761094&view=rev Log: write evaluation code to load deepPhe docTimeRel data Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java?rev=1761094&view=auto ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java (added) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/DeepPheAnaforaXMLReader.java Fri Sep 16 21:26:38 2016 @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.temporal.ae; + +import java.io.File; +import java.io.IOException; +import java.net.MalformedURLException; +import java.util.List; + +import org.apache.ctakes.typesystem.type.constants.CONST; +import org.apache.ctakes.typesystem.type.refsem.Event; +import org.apache.ctakes.typesystem.type.refsem.EventProperties; +import org.apache.ctakes.typesystem.type.textsem.EventMention; +import org.apache.log4j.Logger; +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.analysis_engine.AnalysisEngineDescription; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.collection.CollectionReader; +import org.apache.uima.fit.component.JCasAnnotator_ImplBase; +import org.apache.uima.fit.descriptor.ConfigurationParameter; +import org.apache.uima.fit.factory.AnalysisEngineFactory; +import org.apache.uima.fit.pipeline.SimplePipeline; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.cas.FSArray; +import org.apache.uima.resource.ResourceInitializationException; +import org.cleartk.util.ViewUriUtil; +import org.cleartk.util.cr.UriCollectionReader; +import org.jdom2.Element; +import org.jdom2.JDOMException; +import org.jdom2.input.SAXBuilder; + +import com.google.common.collect.Lists; + +public class DeepPheAnaforaXMLReader extends JCasAnnotator_ImplBase { + private static Logger LOGGER = Logger.getLogger(DeepPheAnaforaXMLReader.class); + + public static final String PARAM_ANAFORA_DIRECTORY = "anaforaDirectory"; + + @ConfigurationParameter( + name = PARAM_ANAFORA_DIRECTORY, + description = "root directory of the Anafora-annotated files, with one subdirectory for " + + "each annotated file") + private File anaforaDirectory; + + public static final String PARAM_ANAFORA_XML_SUFFIXES = "anaforaSuffixes"; + + @ConfigurationParameter( + name = PARAM_ANAFORA_XML_SUFFIXES, + mandatory = false, + description = "list of suffixes that might be added to a file name to identify the Anafora " + + "XML annotations file; only the first suffix corresponding to a file will be used") + private String[] anaforaXMLSuffixes = new String[] { + ".UmlsDeepPhe.dave.completed.xml"}; + + public static AnalysisEngineDescription getDescription() throws ResourceInitializationException { + return AnalysisEngineFactory.createEngineDescription(DeepPheAnaforaXMLReader.class); + } + + public static AnalysisEngineDescription getDescription(File anaforaDirectory) + throws ResourceInitializationException { + return AnalysisEngineFactory.createEngineDescription( + DeepPheAnaforaXMLReader.class, + DeepPheAnaforaXMLReader.PARAM_ANAFORA_DIRECTORY, + anaforaDirectory); + } + + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + // determine source text file + File textFile = new File(ViewUriUtil.getURI(jCas)); + LOGGER.info("processing " + textFile); + + // determine possible Anafora XML file names + File corefFile = new File(textFile.getPath() + ".UmlsDeepPhe.dave.completed.xml"); + List possibleXMLFiles = Lists.newArrayList(); + for (String anaforaXMLSuffix : this.anaforaXMLSuffixes) { + if (this.anaforaDirectory == null) { + possibleXMLFiles.add(new File(textFile + anaforaXMLSuffix)); + } else { + possibleXMLFiles.add(new File(textFile.getPath() + anaforaXMLSuffix)); + } + } + + // find an Anafora XML file that actually exists + File xmlFile = null; + for (File possibleXMLFile : possibleXMLFiles) { + if (possibleXMLFile.exists()) { + xmlFile = possibleXMLFile; + break; + } + } + if (this.anaforaXMLSuffixes.length > 0 && xmlFile == null) { + throw new IllegalArgumentException("no Anafora XML file found from " + possibleXMLFiles); + } + + if(xmlFile != null){ + processXmlFile(jCas, xmlFile); + } + if(corefFile.exists()){ + processXmlFile(jCas, corefFile); + } + } + + private static void processXmlFile(JCas jCas, File xmlFile) throws AnalysisEngineProcessException{ + // load the XML + Element dataElem; + try { + dataElem = new SAXBuilder().build(xmlFile.toURI().toURL()).getRootElement(); + } catch (MalformedURLException e) { + throw new AnalysisEngineProcessException(e); + } catch (JDOMException e) { + throw new AnalysisEngineProcessException(e); + } catch (IOException e) { + throw new AnalysisEngineProcessException(e); + } + + int curEventId = 1; + int docLen = jCas.getDocumentText().length(); + + for (Element annotationsElem : dataElem.getChildren("annotations")) { + + for (Element entityElem : annotationsElem.getChildren("entity")) { + String id = removeSingleChildText(entityElem, "id", null); + Element spanElem = removeSingleChild(entityElem, "span", id); + String type = removeSingleChildText(entityElem, "type", id); + String parType = removeSingleChildText(entityElem, "parentsType", id); + Element propertiesElem = removeSingleChild(entityElem, "properties", id); + + // UIMA doesn't support disjoint spans, so take the span enclosing + // everything + int begin = Integer.MAX_VALUE; + int end = Integer.MIN_VALUE; + for (String spanString : spanElem.getText().split(";")) { + String[] beginEndStrings = spanString.split(","); + if (beginEndStrings.length != 2) { + error("span not of the format 'number,number'", id); + } + int spanBegin = Integer.parseInt(beginEndStrings[0]); + int spanEnd = Integer.parseInt(beginEndStrings[1]); + if (spanBegin < begin) { + begin = spanBegin; + } + if (spanEnd > end) { + end = spanEnd; + } + } + if(begin < 0 || end >= docLen){ + error("Illegal begin or end boundary", id); + continue; + } + + if (!type.equals("Anatomical_site") && parType.equals("UMLSEntities") || parType.equals("Metastasis_Entities")) { + String docTimeRel = removeSingleChildText(propertiesElem, "DocTimeRel", id); + if (docTimeRel == null) { + error("no docTimeRel, assuming OVERLAP", id); +// docTimeRel = "OVERLAP"; + continue; + } + EventMention eventMention = new EventMention(jCas, begin, end); + Event event = new Event(jCas); + EventProperties eventProperties = new EventProperties(jCas); + eventProperties.setDocTimeRel(docTimeRel); + eventProperties.setCategory(type); + eventProperties.addToIndexes(); + event.setConfidence(1.0f); + event.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION); + event.setProperties(eventProperties); + event.setMentions(new FSArray(jCas, 1)); + event.setMentions(0, eventMention); + event.addToIndexes(); + eventMention.setId(curEventId++); + eventMention.setConfidence(1.0f); + eventMention.setDiscoveryTechnique(CONST.NE_DISCOVERY_TECH_GOLD_ANNOTATION); + eventMention.setEvent(event); + eventMention.addToIndexes(); + } +// else if (type.equals("TIMEX3")) { +// String timeClass = removeSingleChildText(propertiesElem, "Class", id); +// TimeMention timeMention = new TimeMention(jCas, begin, end); +// timeMention.setId(curTimexId++); +// timeMention.setTimeClass(timeClass); +// timeMention.addToIndexes(); +// annotation = timeMention; +// +// } else if (type.equals("DOCTIME")) { +// TimeMention timeMention = new TimeMention(jCas, begin, end); +// timeMention.setId(curTimexId++); +// timeMention.setTimeClass(type); +// timeMention.addToIndexes(); +// annotation = timeMention; +// +// } else if (type.equals("SECTIONTIME")) { +// TimeMention timeMention = new TimeMention(jCas, begin, end); +// timeMention.setId(curTimexId++); +// timeMention.setTimeClass(type); +// timeMention.addToIndexes(); +// annotation = timeMention; +// +// } else if (type.equals("Markable")) { +// while(end >= begin && (jCas.getDocumentText().charAt(end-1) == '\n' || jCas.getDocumentText().charAt(end-1) == '\r')){ +// end--; +// } +// Markable markable = new Markable(jCas, begin, end); +// markable.addToIndexes(); +// annotation = markable; +// +// } else if (type.equals("DUPLICATE")) { +// LOGGER.warn("Ignoring duplicate sections in annotations."); +// continue; +// } +// else { +// throw new UnsupportedOperationException("unsupported entity type: " + type); +// } +// +// // match the annotation to it's ID for later use +// idToAnnotation.put(id, annotation); + + // make sure all XML has been consumed + removeSingleChild(entityElem, "parentsType", id); + if (!propertiesElem.getChildren().isEmpty() || !entityElem.getChildren().isEmpty()) { + List children = Lists.newArrayList(); + for (Element child : propertiesElem.getChildren()) { + children.add(child.getName()); + } + for (Element child : entityElem.getChildren()) { + children.add(child.getName()); + } + error("unprocessed children " + children, id); + } + } + } + } + + private static Element getSingleChild(Element elem, String elemName, String causeID) { + List children = elem.getChildren(elemName); + if (children.size() != 1) { + error(String.format("not exactly one '%s' child", elemName), causeID); + } + return children.size() > 0 ? children.get(0) : null; + } + + private static Element removeSingleChild(Element elem, String elemName, String causeID) { + Element child = getSingleChild(elem, elemName, causeID); + elem.removeChildren(elemName); + return child; + } + + private static String removeSingleChildText(Element elem, String elemName, String causeID) { + Element child = getSingleChild(elem, elemName, causeID); + String text = null; + if(child != null){ + text = child.getText(); + } + if (text==null || text.isEmpty()) { + error(String.format("an empty '%s' child", elemName), causeID); + text = null; + } + elem.removeChildren(elemName); + return text; + } + + private static void error(String found, String id) { + LOGGER.error(String.format("found %s in annotation with ID %s", found, id)); + } + + public static void main(String[] args) throws Exception { + List files = Lists.newArrayList(); + for (String path : args) { + files.add(new File(path)); + } + CollectionReader reader = UriCollectionReader.getCollectionReaderFromFiles(files); + AnalysisEngine engine = AnalysisEngineFactory.createEngine(DeepPheAnaforaXMLReader.class); + SimplePipeline.runPipeline(reader, engine); + } +} Added: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java?rev=1761094&view=auto ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java (added) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/EvaluationOfEventDocTimeRelDeepPhe.java Fri Sep 16 21:26:38 2016 @@ -0,0 +1,500 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.ctakes.temporal.eval; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.logging.FileHandler; +import java.util.logging.Formatter; +import java.util.logging.Level; +import java.util.logging.LogRecord; +import java.util.logging.Logger; + +import org.apache.ctakes.relationextractor.eval.SHARPXMI; +import org.apache.ctakes.temporal.ae.DocTimeRelAnnotator; +import org.apache.ctakes.temporal.eval.EvaluationOfEventTimeRelations.ParameterSettings; +import org.apache.ctakes.typesystem.type.refsem.Event; +import org.apache.ctakes.typesystem.type.refsem.EventProperties; +import org.apache.ctakes.typesystem.type.textsem.EventMention; +import org.apache.uima.analysis_engine.AnalysisEngineProcessException; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.CASException; +import org.apache.uima.cas.Feature; +import org.apache.uima.collection.CollectionReader; +import org.apache.uima.jcas.JCas; +import org.apache.uima.resource.ResourceInitializationException; +import org.apache.uima.util.CasCopier; +import org.apache.uima.util.FileUtils; +import org.cleartk.eval.AnnotationStatistics; +import org.cleartk.ml.jar.JarClassifierBuilder; +import org.cleartk.ml.liblinear.LibLinearStringOutcomeDataWriter; +import org.cleartk.ml.tksvmlight.model.CompositeKernel.ComboOperator; +import org.cleartk.util.ViewUriUtil; +import org.apache.uima.fit.component.JCasAnnotator_ImplBase; +import org.apache.uima.fit.factory.AggregateBuilder; +import org.apache.uima.fit.factory.AnalysisEngineFactory; +import org.apache.uima.fit.pipeline.JCasIterator; +import org.apache.uima.fit.pipeline.SimplePipeline; +import org.apache.uima.fit.util.JCasUtil; + +import com.google.common.base.Function; +import com.google.common.collect.Maps; +import com.lexicalscope.jewel.cli.CliFactory; +import com.lexicalscope.jewel.cli.Option; + +public class EvaluationOfEventDocTimeRelDeepPhe extends +Evaluation_ImplBase>>{ + static interface TempRelOptions extends Evaluation_ImplBase.Options{ + @Option + public boolean getPrintFormattedRelations(); + + @Option + public boolean getBaseline(); + + @Option + public boolean getClosure(); + + @Option + public boolean getUseTmp(); + + @Option + public boolean getUseGoldAttributes(); + + @Option + public boolean getSkipTrain(); + } + + // protected static ParameterSettings flatParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS, DEFAULT_DOWNSAMPLE, "linear", + // 10.0, 1.0, "linear", ComboOperator.VECTOR_ONLY, DEFAULT_TK, DEFAULT_LAMBDA); + // protected static ParameterSettings allBagsParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS, DEFAULT_DOWNSAMPLE, "tk", + // 100.0, 0.1, "radial basis function", ComboOperator.SUM, 0.5, 0.5); + // protected static ParameterSettings ftParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS, DEFAULT_DOWNSAMPLE, "tk", + // 1.0, 0.1, "radial basis function", ComboOperator.SUM, 0.5, 0.5); + // private static Boolean recallModeEvaluation = true; + protected static boolean DEFAULT_BOTH_DIRECTIONS = false; + protected static float DEFAULT_DOWNSAMPLE = 1.0f; + protected static ParameterSettings allParams = new ParameterSettings(DEFAULT_BOTH_DIRECTIONS, DEFAULT_DOWNSAMPLE, "tk", + 10.0, 1.0, "polynomial", ComboOperator.SUM, 0.1, 0.5); // (0.3, 0.4 for tklibsvm) + private static final String DOC_TIME_REL = "docTimeRel"; + private static final int DISCOVERTY_TYPE = 100; + + public static void main(String[] args) throws Exception { + TempRelOptions options = CliFactory.parseArguments(TempRelOptions.class, args); + List trainItems = Arrays.asList(3, 11, 92, 93 ); + List testItems = Arrays.asList(2, 21); + + // possibleParams.add(defaultParams); + + ParameterSettings params = allParams; + try{ + File workingDir = new File("target/eval/event-properties"); + if(!workingDir.exists()) workingDir.mkdirs(); + if(options.getUseTmp()){ + File tempModelDir = File.createTempFile("temporal", null, workingDir); + tempModelDir.delete(); + tempModelDir.mkdir(); + workingDir = tempModelDir; + } + EvaluationOfEventDocTimeRelDeepPhe evaluation = new EvaluationOfEventDocTimeRelDeepPhe( + workingDir, + options.getRawTextDirectory(), + options.getXMLDirectory(), + options.getXMLFormat(), + options.getSubcorpus(), + options.getXMIDirectory(), + options.getTreebankDirectory(), + options.getClosure(), + options.getPrintFormattedRelations(), + options.getUseGoldAttributes(), + params); + // evaluation.prepareXMIsFor(patientSets); + List training = trainItems; + List testing = testItems; + + evaluation.logClassificationErrors(workingDir, "deepPhe-event-property-errors"); + + //do closure on system, but not on gold, to calculate recall + evaluation.skipTrain = options.getSkipTrain(); + if(!evaluation.skipTrain){ + evaluation.prepareXMIsFor(training); + } + evaluation.prepareXMIsFor(testing); + + Map> stats = null; + + stats = evaluation.trainAndTest(trainItems, testItems);//training + + String name = DOC_TIME_REL; + System.err.println("===================="); + System.err.println(name); + System.err.println("--------------------"); + System.err.println(stats.get(name)); + + + if(options.getUseTmp()){ + // won't work because it's not empty. should we be concerned with this or is it responsibility of + // person invoking the tmp flag? + FileUtils.deleteRecursive(workingDir); + } + }catch(ResourceInitializationException e){ + System.err.println("Error with Initialization"); + e.printStackTrace(); + } + } + + private ParameterSettings params; + protected boolean useClosure; + protected boolean useGoldAttributes; + protected boolean skipTrain=false; + private Map loggers = Maps.newHashMap(); + // protected boolean printRelations = false; + + public EvaluationOfEventDocTimeRelDeepPhe( + File baseDirectory, + File rawTextDirectory, + File xmlDirectory, + XMLFormat xmlFormat, + Subcorpus subcorpus, + File xmiDirectory, + File treebankDirectory, + boolean useClosure, + boolean printErrors, + boolean useGoldAttributes, + ParameterSettings params + ){ + super( + baseDirectory, + rawTextDirectory, + xmlDirectory, + xmlFormat, + subcorpus, + xmiDirectory, + treebankDirectory); + this.useClosure = useClosure; + this.printErrors = printErrors; + this.params = params; + this.useGoldAttributes = useGoldAttributes; + this.loggers.put(DOC_TIME_REL, Logger.getLogger(String.format("%s.%s", this.getClass().getName(), DOC_TIME_REL))); + } + + @Override + protected void train(CollectionReader collectionReader, File directory) throws Exception { + if(this.skipTrain) return; + AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); + aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(KeepEventMentionsCoveredByGoldMentions.class)); + aggregateBuilder.add(DocTimeRelAnnotator.createDataWriterDescription( +// LibSvmStringOutcomeDataWriter.class, + LibLinearStringOutcomeDataWriter.class, + new File(directory, DOC_TIME_REL))); + SimplePipeline.runPipeline(collectionReader, aggregateBuilder.createAggregate()); + String[] optArray; + + if(this.kernelParams == null){ + ArrayList svmOptions = new ArrayList<>(); + svmOptions.add("-c"); svmOptions.add(""+params.svmCost); // svm cost + svmOptions.add("-t"); svmOptions.add(""+params.svmKernelIndex); // kernel index + svmOptions.add("-d"); svmOptions.add("3"); // degree parameter for polynomial + svmOptions.add("-g"); svmOptions.add(""+params.svmGamma); + if(params.svmKernelIndex==ParameterSettings.SVM_KERNELS.indexOf("tk")){ + svmOptions.add("-S"); svmOptions.add(""+params.secondKernelIndex); // second kernel index (similar to -t) for composite kernel + String comboFlag = (params.comboOperator == ComboOperator.SUM ? "+" : params.comboOperator == ComboOperator.PRODUCT ? "*" : params.comboOperator == ComboOperator.TREE_ONLY ? "T" : "V"); + svmOptions.add("-C"); svmOptions.add(comboFlag); + svmOptions.add("-L"); svmOptions.add(""+params.lambda); + svmOptions.add("-T"); svmOptions.add(""+params.tkWeight); + svmOptions.add("-N"); svmOptions.add("3"); // normalize trees and features + } + optArray = svmOptions.toArray(new String[]{}); + }else{ + optArray = this.kernelParams; + for(int i = 0; i < optArray.length; i+=2){ + optArray[i] = "-" + optArray[i]; + } + } + + //calculate class-wise weights: + String[] weightArray=new String[2]; + weightArray[0] = "-c"; + weightArray[1] = optArray[1]; + JarClassifierBuilder.trainAndPackage(new File(directory, DOC_TIME_REL),weightArray); + } + + @SuppressWarnings("deprecation") + @Override + protected Map> test(CollectionReader collectionReader, File directory) + throws Exception { + this.useClosure=false;//don't do closure for test + AggregateBuilder aggregateBuilder = this.getPreprocessorAggregateBuilder(); +// aggregateBuilder.add(CopyFromGold.getDescription(EventMention.class, TimeMention.class)); + aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ReplaceCTakesMentionsWithGoldMentions.class)); + +// aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ClearEventProperties.class)); + + aggregateBuilder.add(DocTimeRelAnnotator.createAnnotatorDescription(new File(directory, DOC_TIME_REL))); + + aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CopyHeadEventDocTimeRel2GoldEvent.class)); + + Function eventMentionToSpan = AnnotationStatistics.annotationToSpan(); + Map> propertyGetters; + propertyGetters = new HashMap<>(); + propertyGetters.put(DOC_TIME_REL, getPropertyGetter(DOC_TIME_REL)); + + Map> statsMap = new HashMap<>(); + + statsMap.put(DOC_TIME_REL, new AnnotationStatistics()); + + for (Iterator casIter = new JCasIterator(collectionReader, aggregateBuilder.createAggregate()); casIter.hasNext();){ + JCas jCas = casIter.next(); + JCas goldView = jCas.getView(GOLD_VIEW_NAME); + JCas systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA); + String text = goldView.getDocumentText(); + + List goldEvents = new ArrayList<>(JCasUtil.select(goldView, EventMention.class)); + List systemEvents = new ArrayList<>(JCasUtil.select(systemView, EventMention.class)); + String name = DOC_TIME_REL; + this.loggers.get(name).fine("Errors in : " + ViewUriUtil.getURI(jCas).toString()); + Function getProperty = propertyGetters.get(name); + statsMap.get(name).add( + goldEvents, + systemEvents, + eventMentionToSpan, + getProperty); + for (int i = 0; i < goldEvents.size(); ++i) { + String goldOutcome = getProperty.apply(goldEvents.get(i)); + String systemOutcome = getProperty.apply(systemEvents.get(i)); + EventMention event = goldEvents.get(i); + int begin = event.getBegin(); + int end = event.getEnd(); + int windowBegin = Math.max(0, begin - 100); + int windowEnd = Math.min(text.length(), end + 100); + if (!goldOutcome.equals(systemOutcome)) { + this.loggers.get(name).fine(String.format( + "%s was %s but should be %s, in ...%s[!%s!:%d-%d]%s...", + name, + systemOutcome, + goldOutcome, + text.substring(windowBegin, begin).replaceAll("[\r\n]", " "), + text.substring(begin, end), + begin, + end, + text.substring(end, windowEnd).replaceAll("[\r\n]", " "))); + }else{//if gold outcome equals system outcome + this.loggers.get(name).fine(String.format( + "%s was correctly labeled as %s, in ...%s[!%s!:%d-%d]%s...", + name, + goldOutcome, + text.substring(windowBegin, begin).replaceAll("[\r\n]", " "), + text.substring(begin, end), + begin, + end, + text.substring(end, windowEnd).replaceAll("[\r\n]", " "))); + } + } + + + } + return statsMap; + } + + public void logClassificationErrors(File outputDir, String outputFilePrefix) throws IOException { + if (!outputDir.exists()) { + outputDir.mkdirs(); + } + String name = DOC_TIME_REL; + Logger logger = this.loggers.get(name); + logger.setLevel(Level.FINE); + File outputFile = new File(outputDir, String.format("%s.%s.log", outputFilePrefix, name)); + FileHandler handler = new FileHandler(outputFile.getPath()); + handler.setFormatter(new Formatter() { + @Override + public String format(LogRecord record) { + return record.getMessage() + '\n'; + } + }); + logger.addHandler(handler); + + } + + private static Function getPropertyGetter(final String propertyName) { + return new Function() { + @Override + public String apply(EventMention eventMention) { + EventProperties eventProperties = eventMention.getEvent().getProperties(); + Feature feature = eventProperties.getType().getFeatureByBaseName(propertyName); + return eventProperties.getFeatureValueAsString(feature); + } + }; + } + + public static class ClearEventProperties extends org.apache.uima.fit.component.JCasAnnotator_ImplBase { + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + for (EventProperties eventProperties : JCasUtil.select(jCas, EventProperties.class)) { + eventProperties.setAspect(null); + eventProperties.setCategory(null); + eventProperties.setContextualAspect(null); + eventProperties.setContextualModality(null); + eventProperties.setDegree(null); + eventProperties.setDocTimeRel(null); + eventProperties.setPermanence(null); + eventProperties.setPolarity(0); + } + } + + } + + /** + * Annotator that removes cTAKES Mentions and Modifiers from the system view, + * and copies over the manually annotated Mentions and Modifiers from the gold + * view. + */ + public static class ReplaceCTakesMentionsWithGoldMentions extends JCasAnnotator_ImplBase { + + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + JCas goldView, systemView; + try { + goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME); + systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA); + } catch (CASException e) { + throw new AnalysisEngineProcessException(e); + } + + // remove cTAKES Mentions and Modifiers from system view +// List cTakesMentions = new ArrayList<>(); +// cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class)); +// for (IdentifiedAnnotation cTakesMention : cTakesMentions) { +// cTakesMention.removeFromIndexes(); +// } + + // copy gold Mentions and Modifiers to the system view + List goldMentions = new ArrayList<>(); + goldMentions.addAll(JCasUtil.select(goldView, EventMention.class)); + CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas()); + for (EventMention goldMention : goldMentions) { + EventMention copy = (EventMention) copier.copyFs(goldMention); + Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa"); + copy.setFeatureValue(sofaFeature, systemView.getSofa()); + copy.setDiscoveryTechnique(DISCOVERTY_TYPE);//mark copied events + copy.addToIndexes(); + } + } + } + + public static class KeepEventMentionsCoveredByGoldMentions extends JCasAnnotator_ImplBase { + + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + JCas goldView, systemView; + try { + goldView = jCas.getView(SHARPXMI.GOLD_VIEW_NAME); + systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA); + } catch (CASException e) { + throw new AnalysisEngineProcessException(e); + } + + // copy gold events to the system view + List goldMentions = new ArrayList<>(); + goldMentions.addAll(JCasUtil.select(goldView, EventMention.class)); + CasCopier copier = new CasCopier(goldView.getCas(), systemView.getCas()); + for (EventMention goldMention : goldMentions) { + //find system Event that is covered by goldEvent: + boolean findCoveredSystemEvent = false; + for(EventMention sysEvent: JCasUtil.selectCovered(systemView, EventMention.class, goldMention.getBegin(), goldMention.getEnd())){ + String goldDocTimeRel = goldMention.getEvent().getProperties().getDocTimeRel(); + sysEvent.setDiscoveryTechnique(DISCOVERTY_TYPE);//mark copied events + findCoveredSystemEvent = true; + if(sysEvent.getEvent()==null){ + Event event = new Event(systemView); + EventProperties props = new EventProperties(systemView); + props.setDocTimeRel(goldDocTimeRel); + event.setProperties(props); + sysEvent.setEvent(event); + }else{ + sysEvent.getEvent().getProperties().setDocTimeRel(goldDocTimeRel); + } + } + + if( !findCoveredSystemEvent ){// if we didn't find covered system event for the given gold event + EventMention copy = (EventMention) copier.copyFs(goldMention); + Feature sofaFeature = copy.getType().getFeatureByBaseName("sofa"); + copy.setFeatureValue(sofaFeature, systemView.getSofa()); + copy.setDiscoveryTechnique(DISCOVERTY_TYPE);//mark copied events + copy.addToIndexes(); + } + } + + //remove non-gold events: + List cTakesMentions = new ArrayList<>(); + cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class)); + for (EventMention aEvent: cTakesMentions){ + if( aEvent.getDiscoveryTechnique() != DISCOVERTY_TYPE){//if this is not an gold event + aEvent.removeFromIndexes(); + } + } + } + } + + /** + * copy covered event's DocTimeRel to the gold event + * remove non-gold eventMentions + */ + public static class CopyHeadEventDocTimeRel2GoldEvent extends JCasAnnotator_ImplBase { + + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + JCas systemView; + try { + systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA); + } catch (CASException e) { + throw new AnalysisEngineProcessException(e); + } + + //build an eventMention-eventMention covered map + Map> coveredMap = + JCasUtil.indexCovered(jCas, EventMention.class, EventMention.class); + + // copy covered event's DocTimeRel to the gold event + for (EventMention aEvent: JCasUtil.select(systemView, EventMention.class)){ + if( aEvent.getDiscoveryTechnique()== DISCOVERTY_TYPE){//if this is an gold event + for(EventMention coveredEvent: coveredMap.get(aEvent)){ + String covDocTimeRel = coveredEvent.getEvent().getProperties().getDocTimeRel(); + aEvent.getEvent().getProperties().setDocTimeRel(covDocTimeRel); + break; + } + } + } + + + List cTakesMentions = new ArrayList<>(); + cTakesMentions.addAll(JCasUtil.select(systemView, EventMention.class)); + for (EventMention aEvent: cTakesMentions){ + if( aEvent.getDiscoveryTechnique() != DISCOVERTY_TYPE){//if this is not an gold event + aEvent.removeFromIndexes(); + } + } + } + } +}