Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0631519F2E for ; Mon, 25 Apr 2016 21:00:46 +0000 (UTC) Received: (qmail 48879 invoked by uid 500); 25 Apr 2016 21:00:46 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 48844 invoked by uid 500); 25 Apr 2016 21:00:45 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 48835 invoked by uid 99); 25 Apr 2016 21:00:45 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd2-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 25 Apr 2016 21:00:45 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd2-us-west.apache.org (ASF Mail Server at spamd2-us-west.apache.org) with ESMTP id 7AE681A1122 for ; Mon, 25 Apr 2016 21:00:45 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd2-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 1.799 X-Spam-Level: * X-Spam-Status: No, score=1.799 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RP_MATCHES_RCVD=-0.001] autolearn=disabled Received: from mx1-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd2-us-west.apache.org [10.40.0.9]) (amavisd-new, port 10024) with ESMTP id dsa2jF3o_Rhd for ; Mon, 25 Apr 2016 21:00:43 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx1-lw-eu.apache.org (ASF Mail Server at mx1-lw-eu.apache.org) with ESMTP id 6B8865F39B for ; Mon, 25 Apr 2016 21:00:42 +0000 (UTC) Received: from svn01-us-west.apache.org (svn.apache.org [10.41.0.6]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 6C8F9E0483 for ; Mon, 25 Apr 2016 21:00:41 +0000 (UTC) Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id 6D0EE3A0113 for ; Mon, 25 Apr 2016 21:00:41 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1740899 - in /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor: ae/LocationOfRelationExtractorAnnotator.java eval/RelationExtractorEvaluation.java Date: Mon, 25 Apr 2016 21:00:41 -0000 To: commits@ctakes.apache.org From: clin@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20160425210041.6D0EE3A0113@svn01-us-west.apache.org> Author: clin Date: Mon Apr 25 21:00:41 2016 New Revision: 1740899 URL: http://svn.apache.org/viewvc?rev=1740899&view=rev Log: add back event-expansion for training location-of model: to expand use parameter: --expand-events Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java?rev=1740899&r1=1740898&r2=1740899&view=diff ============================================================================== --- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java (original) +++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/LocationOfRelationExtractorAnnotator.java Mon Apr 25 21:00:41 2016 @@ -19,12 +19,16 @@ package org.apache.ctakes.relationextractor.ae; import java.util.ArrayList; +import java.util.Collection; import java.util.List; +import java.util.Map; +import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation; import org.apache.ctakes.typesystem.type.relation.BinaryTextRelation; import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation; import org.apache.ctakes.typesystem.type.relation.RelationArgument; import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention; +import org.apache.ctakes.typesystem.type.textsem.EntityMention; import org.apache.ctakes.typesystem.type.textsem.EventMention; import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; import org.apache.ctakes.typesystem.type.textspan.Sentence; @@ -55,17 +59,63 @@ public class LocationOfRelationExtractor List pairs = new ArrayList<>(); - for (EventMention event : events) { + if(RelationExtractorEvaluation.expandEvent){//if expand + Map> coveredMap = + JCasUtil.indexCovered(identifiedAnnotationView, EventMention.class, EventMention.class); + Map> coveringMap = + JCasUtil.indexCovering(identifiedAnnotationView, EventMention.class, EventMention.class); + Map> siteEventMap = + JCasUtil.indexCovered(identifiedAnnotationView, AnatomicalSiteMention.class, EventMention.class); + Map> siteEntityMap = + JCasUtil.indexCovering(identifiedAnnotationView, AnatomicalSiteMention.class, EntityMention.class); + + final List eventList = new ArrayList<>(); + for (EventMention event : events) { + eventList.addAll(coveringMap.get(event)); + eventList.addAll(coveredMap.get(event)); + for(IdentifiedAnnotation covEvent : eventList){ + for (AnatomicalSiteMention site : sites) { + if(!hasOverlap(covEvent,site)){ + pairs.add(new IdentifiedAnnotationPair(covEvent, site)); + } + } + } + eventList.clear(); for (AnatomicalSiteMention site : sites) { pairs.add(new IdentifiedAnnotationPair(event, site)); + eventList.addAll(siteEventMap.get(site)); + eventList.addAll(siteEntityMap.get(site)); + for(IdentifiedAnnotation covSite : eventList){ + if(!hasOverlap(event,covSite)){ + pairs.add(new IdentifiedAnnotationPair(event, covSite)); + } + } + eventList.clear(); } + + } + }else{//id don't expand + for (EventMention event : events) { + for (AnatomicalSiteMention site : sites) { + pairs.add(new IdentifiedAnnotationPair(event, site)); + } + } } return pairs; } - + private static boolean hasOverlap(Annotation event1, Annotation event2) { + if(event1.getEnd()>=event2.getBegin()&&event1.getEnd()<=event2.getEnd()){ + return true; + } + if(event2.getEnd()>=event1.getBegin()&&event2.getEnd()<=event1.getEnd()){ + return true; + } + return false; + } + @Override protected void createRelation( JCas jCas, Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java?rev=1740899&r1=1740898&r2=1740899&view=diff ============================================================================== --- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java (original) +++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/eval/RelationExtractorEvaluation.java Mon Apr 25 21:00:41 2016 @@ -50,6 +50,7 @@ import org.apache.ctakes.typesystem.type import org.apache.ctakes.typesystem.type.relation.ManagesTreatsTextRelation; import org.apache.ctakes.typesystem.type.relation.ManifestationOfTextRelation; import org.apache.ctakes.typesystem.type.relation.RelationArgument; +import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention; import org.apache.ctakes.typesystem.type.textsem.EntityMention; import org.apache.ctakes.typesystem.type.textsem.EventMention; import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation; @@ -127,6 +128,11 @@ public class RelationExtractorEvaluation longName = "class-weights", description = "automatically set class-wise weights for inbalanced training data") public boolean getClassWeights(); + + @Option( + longName = "expand-events", + description = "expand events to their covering or covered events") + public boolean getExpandEvents(); } @@ -227,7 +233,8 @@ public class RelationExtractorEvaluation options.getAllowSmallerSystemArguments(), options.getIgnoreImpossibleGoldRelations(), options.getPrintErrors(), - options.getClassWeights()); + options.getClassWeights(), + options.getExpandEvents()); } }); } @@ -250,6 +257,8 @@ public class RelationExtractorEvaluation private boolean setClassWeights; private static PrintWriter outPrint; + + public static boolean expandEvent = false; /** * An evaluation of a relation extractor. @@ -282,7 +291,8 @@ public class RelationExtractorEvaluation boolean allowSmallerSystemArguments, boolean ignoreImpossibleGoldRelations, boolean printErrors, - boolean setClassWeights) { + boolean setClassWeights, + boolean expandEventParameter) { super(baseDirectory); this.relationClass = relationClass; this.classifierAnnotatorClass = classifierAnnotatorClass; @@ -292,6 +302,7 @@ public class RelationExtractorEvaluation this.ignoreImpossibleGoldRelations = ignoreImpossibleGoldRelations; this.printErrors = printErrors; this.setClassWeights = setClassWeights; + expandEvent = expandEventParameter; } public RelationExtractorEvaluation( @@ -308,6 +319,7 @@ public class RelationExtractorEvaluation false, false, false, + false, false); } @@ -325,6 +337,10 @@ public class RelationExtractorEvaluation // in the gold relations builder.add(AnalysisEngineFactory.createEngineDescription(RemoveCTakesMentionsAndCopyGoldRelations.class)); + //add potential events for training: + if (expandEvent && this.relationClass.getSimpleName().equals("LocationOfTextRelation") ) + builder.add(AnalysisEngineFactory.createEngineDescription(AddPotentialRelations.class)); + // add the relation extractor, configured for training mode AnalysisEngineDescription classifierAnnotator = AnalysisEngineFactory.createEngineDescription( @@ -366,6 +382,94 @@ public class RelationExtractorEvaluation } } + public static class AddPotentialRelations extends JCasAnnotator_ImplBase { + @Override + public void process(JCas jCas) throws AnalysisEngineProcessException { + JCas relationView = jCas; + + Map> coveredMap = + JCasUtil.indexCovered(relationView, EventMention.class, EventMention.class); + Map> coveringMap = + JCasUtil.indexCovering(relationView, EventMention.class, EventMention.class); + Map> siteEventMap = + JCasUtil.indexCovered(relationView, AnatomicalSiteMention.class, EventMention.class); + Map> siteEntityMap = + JCasUtil.indexCovering(relationView, AnatomicalSiteMention.class, EntityMention.class); + final List eventList = new ArrayList<>(); + for(LocationOfTextRelation relation : Lists.newArrayList(JCasUtil.select(relationView, LocationOfTextRelation.class))){ + Annotation arg1 = relation.getArg1().getArgument(); + Annotation arg2 = relation.getArg2().getArgument(); + EventMention event = null; + if(arg1 instanceof EventMention && arg2 instanceof AnatomicalSiteMention){ + event = (EventMention) arg1; + + eventList.addAll(coveringMap.get(event)); + eventList.addAll(coveredMap.get(event)); + for(IdentifiedAnnotation covEvent : eventList){ + if(!covEvent.getClass().equals(EventMention.class) && !hasOverlap(covEvent, arg2)){ + createRelation(relationView, covEvent, arg2, relation.getCategory()); + } + } + eventList.clear(); + eventList.addAll(siteEventMap.get(arg2)); + eventList.addAll(siteEntityMap.get(arg2)); + for(IdentifiedAnnotation covSite : eventList){ + if(!covSite.getClass().equals(EventMention.class) && !hasOverlap(arg1, covSite)){ + createRelation(relationView, event, covSite, relation.getCategory()); + } + } + eventList.clear(); + }else if(arg2 instanceof EventMention && arg1 instanceof AnatomicalSiteMention){ + event = (EventMention) arg2; + eventList.addAll(coveringMap.get(event)); + eventList.addAll(coveredMap.get(event)); + for(IdentifiedAnnotation covEvent : eventList){ + if(!covEvent.getClass().equals(EventMention.class)&& !hasOverlap(arg1, covEvent)){ + createRelation(relationView, arg1, covEvent, relation.getCategory()); + } + } + eventList.clear(); + eventList.addAll(siteEventMap.get(arg1)); + eventList.addAll(siteEntityMap.get(arg1)); + for(IdentifiedAnnotation covSite : eventList){ + if(!covSite.getClass().equals(EventMention.class) && !hasOverlap(covSite, arg2)){ + createRelation(relationView, covSite, event, relation.getCategory()); + } + } + eventList.clear(); + } + } + + } + + private static boolean hasOverlap(Annotation event1, Annotation event2) { + if(event1.getEnd()>=event2.getBegin()&&event1.getEnd()<=event2.getEnd()){ + return true; + } + if(event2.getEnd()>=event1.getBegin()&&event2.getEnd()<=event1.getEnd()){ + return true; + } + return false; + } + + private static void createRelation(JCas jCas, Annotation arg1, + Annotation arg2, String category) { + RelationArgument relArg1 = new RelationArgument(jCas); + relArg1.setArgument(arg1); + relArg1.setRole("Arg1"); + relArg1.addToIndexes(); + RelationArgument relArg2 = new RelationArgument(jCas); + relArg2.setArgument(arg2); + relArg2.setRole("Arg2"); + relArg2.addToIndexes(); + BinaryTextRelation relation = new BinaryTextRelation(jCas); + relation.setArg1(relArg1); + relation.setArg2(relArg2); + relation.setCategory(category); + relation.addToIndexes(); + + } + } @Override protected AnnotationStatistics test(CollectionReader collectionReader, File directory)