Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 43E6D200B87 for ; Mon, 19 Sep 2016 23:01:07 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 4286D160ACC; Mon, 19 Sep 2016 21:01:07 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 5FA17160ABB for ; Mon, 19 Sep 2016 23:01:06 +0200 (CEST) Received: (qmail 73960 invoked by uid 500); 19 Sep 2016 21:01:05 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 73950 invoked by uid 99); 19 Sep 2016 21:01:05 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 19 Sep 2016 21:01:05 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id 339EFCA523 for ; Mon, 19 Sep 2016 21:01:05 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -0.124 X-Spam-Level: X-Spam-Status: No, score=-0.124 tagged_above=-999 required=6.31 tests=[KAM_LAZY_DOMAIN_SECURITY=1, RP_MATCHES_RCVD=-1.124] autolearn=disabled Received: from mx2-lw-us.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id B6hh8ij-P6XV for ; Mon, 19 Sep 2016 21:01:04 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx2-lw-us.apache.org (ASF Mail Server at mx2-lw-us.apache.org) with ESMTP id E3D965F1EC for ; Mon, 19 Sep 2016 21:01:03 +0000 (UTC) Received: from svn01-us-west.apache.org (svn.apache.org [10.41.0.6]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 3358DE0230 for ; Mon, 19 Sep 2016 21:01:03 +0000 (UTC) Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id 62D893A039C for ; Mon, 19 Sep 2016 21:01:02 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1761503 - /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPrinter.java Date: Mon, 19 Sep 2016 21:01:02 -0000 To: commits@ctakes.apache.org From: dligach@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20160919210102.62D893A039C@svn01-us-west.apache.org> archived-at: Mon, 19 Sep 2016 21:01:07 -0000 Author: dligach Date: Mon Sep 19 21:01:01 2016 New Revision: 1761503 URL: http://svn.apache.org/viewvc?rev=1761503&view=rev Log: using a single relation snippet printer now instead of two separate ones Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPrinter.java Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPrinter.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPrinter.java?rev=1761503&r1=1761502&r2=1761503&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPrinter.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/nn/data/EventTimeRelPrinter.java Mon Sep 19 21:01:01 2016 @@ -50,7 +50,6 @@ import org.apache.uima.fit.pipeline.Simp import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; -import org.cleartk.util.ViewUriUtil; import com.lexicalscope.jewel.cli.CliFactory; import com.lexicalscope.jewel.cli.Option; @@ -106,7 +105,9 @@ public class EventTimeRelPrinter { // write training data to file CollectionReader trainCollectionReader = Utils.getCollectionReader(trainFiles); AnalysisEngine trainDataWriter = AnalysisEngineFactory.createEngine( - TrainRelationSnippetPrinter.class, + RelationSnippetPrinter.class, + "IsTraining", + true, "OutputFile", trainFile.getAbsoluteFile()); SimplePipeline.runPipeline(trainCollectionReader, trainDataWriter); @@ -114,7 +115,9 @@ public class EventTimeRelPrinter { // write dev data to file CollectionReader devCollectionReader = Utils.getCollectionReader(devFiles); AnalysisEngine devDataWriter = AnalysisEngineFactory.createEngine( - TestRelationSnippetPrinter.class, + RelationSnippetPrinter.class, + "IsTraining", + false, "OutputFile", devFile.getAbsolutePath()); SimplePipeline.runPipeline(devCollectionReader, devDataWriter); @@ -122,12 +125,16 @@ public class EventTimeRelPrinter { /** * Print gold standard relations and their context. - * - * @author dmitriy dligach */ - public static class TrainRelationSnippetPrinter extends JCasAnnotator_ImplBase { + public static class RelationSnippetPrinter extends JCasAnnotator_ImplBase { @ConfigurationParameter( + name = "IsTraining", + mandatory = true, + description = "are we training?") + private boolean isTraining; + + @ConfigurationParameter( name = "OutputFile", mandatory = true, description = "path to the output file") @@ -150,22 +157,29 @@ public class EventTimeRelPrinter { throw new AnalysisEngineProcessException(e); } - // can't iterate over binary text relations in a sentence, so need - // a lookup from pair of annotations to binary text relation + // can't iterate over binary text relations in a sentence, so need a lookup Map, BinaryTextRelation> relationLookup = new HashMap<>(); - for(BinaryTextRelation relation : JCasUtil.select(goldView, TemporalTextRelation.class)) { - Annotation arg1 = relation.getArg1().getArgument(); - Annotation arg2 = relation.getArg2().getArgument(); - - if(relationLookup.get(Arrays.asList(arg1, arg2)) != null) { - // there is already a relation between arg1 and arg2 - // only store if it is 'contains' relation - if(relation.getCategory().equals("CONTAINS")) { - relationLookup.put(Arrays.asList(arg1, arg2), relation); + if(isTraining) { + for(BinaryTextRelation relation : JCasUtil.select(goldView, TemporalTextRelation.class)) { + Annotation arg1 = relation.getArg1().getArgument(); + Annotation arg2 = relation.getArg2().getArgument(); + + if(relationLookup.get(Arrays.asList(arg1, arg2)) != null) { + // there is already a relation between arg1 and arg2 + // only store if it is 'contains' relation + if(relation.getCategory().equals("CONTAINS")) { + relationLookup.put(Arrays.asList(arg1, arg2), relation); + } else { + System.out.println("skipping relation: " + arg1.getCoveredText() + " ... " + arg2.getCoveredText()); + } } else { - System.out.println("skipping relation: " + arg1.getCoveredText() + " ... " + arg2.getCoveredText()); + relationLookup.put(Arrays.asList(arg1, arg2), relation); } - } else { + } + } else { + for(BinaryTextRelation relation : JCasUtil.select(goldView, TemporalTextRelation.class)) { + Annotation arg1 = relation.getArg1().getArgument(); + Annotation arg2 = relation.getArg2().getArgument(); relationLookup.put(Arrays.asList(arg1, arg2), relation); } } @@ -195,92 +209,6 @@ public class EventTimeRelPrinter { } String context; - if(time.getBegin() < event.getBegin()) { - // ... time ... event ... scenario - context = getTokenContext(systemView, sentence, time, "t", event, "e", 2); - } else { - // ... event ... time ... scenario - context = getTokenContext(systemView, sentence, event, "e", time, "t", 2); - } - - String text = String.format("%s|%s", label, context); - eventTimeRelationsInSentence.add(text.toLowerCase()); - } - } - - try { - Files.write(Paths.get(outputFile), eventTimeRelationsInSentence, StandardOpenOption.APPEND); - } catch (IOException e) { - e.printStackTrace(); - } - } - } - } - - /** - * Print gold standard relations and their context. - * - * @author dmitriy dligach - */ - public static class TestRelationSnippetPrinter extends JCasAnnotator_ImplBase { - - @ConfigurationParameter( - name = "OutputFile", - mandatory = true, - description = "path to the output file") - private String outputFile; - - @Override - public void process(JCas jCas) throws AnalysisEngineProcessException { - - JCas goldView; - try { - goldView = jCas.getView("GoldView"); - } catch (CASException e) { - throw new AnalysisEngineProcessException(e); - } - - JCas systemView; - try { - systemView = jCas.getView("_InitialView"); - } catch (CASException e) { - throw new AnalysisEngineProcessException(e); - } - - // can't iterate over binary text relations in a sentence, so need - // a lookup from pair of annotations to binary text relation - Map, BinaryTextRelation> relationLookup = new HashMap<>(); - for(BinaryTextRelation relation : JCasUtil.select(goldView, TemporalTextRelation.class)) { - Annotation arg1 = relation.getArg1().getArgument(); - Annotation arg2 = relation.getArg2().getArgument(); - relationLookup.put(Arrays.asList(arg1, arg2), relation); - } - - // go over sentences, extracting event-event relation instances - for(Sentence sentence : JCasUtil.select(systemView, Sentence.class)) { - List eventTimeRelationsInSentence = new ArrayList<>(); - - // retrieve event-time relations in this sentence - for(EventMention event : JCasUtil.selectCovered(goldView, EventMention.class, sentence)) { - for(TimeMention time : JCasUtil.selectCovered(goldView, TimeMention.class, sentence)) { - - BinaryTextRelation timeEventRelation = relationLookup.get(Arrays.asList(time, event)); - BinaryTextRelation eventTimeRelation = relationLookup.get(Arrays.asList(event, time)); - - // TODO: am I capturing multiple relations here? probably not... - String label = "none"; - if(timeEventRelation != null) { - if(timeEventRelation.getCategory().equals("CONTAINS")) { - label = "contains"; // this is contains - } - } - if(eventTimeRelation != null) { - if(eventTimeRelation.getCategory().equals("CONTAINS")) { - label = "contains-1"; // this is contains - } - } - - String context; if(time.getBegin() < event.getBegin()) { // ... time ... event ... scenario context = getTokenContext(systemView, sentence, time, "t", event, "e", 2);