Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B71E11748C for ; Tue, 19 May 2015 21:30:44 +0000 (UTC) Received: (qmail 57172 invoked by uid 500); 19 May 2015 21:30:44 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 57141 invoked by uid 500); 19 May 2015 21:30:44 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 57132 invoked by uid 99); 19 May 2015 21:30:44 -0000 Received: from eris.apache.org (HELO hades.apache.org) (140.211.11.105) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 19 May 2015 21:30:44 +0000 Received: from hades.apache.org (localhost [127.0.0.1]) by hades.apache.org (ASF Mail Server at hades.apache.org) with ESMTP id 5EF62AC012B for ; Tue, 19 May 2015 21:30:44 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1680396 - /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java Date: Tue, 19 May 2015 21:30:44 -0000 To: commits@ctakes.apache.org From: tmill@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20150519213044.5EF62AC012B@hades.apache.org> Author: tmill Date: Tue May 19 21:30:43 2015 New Revision: 1680396 URL: http://svn.apache.org/r1680396 Log: Added time expressions to markables, allow whole sentence to be markable (if it's an NP), a few other edge cases. Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java?rev=1680396&r1=1680395&r2=1680396&view=diff ============================================================================== --- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java (original) +++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java Tue May 19 21:30:43 2015 @@ -10,11 +10,11 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.ctakes.dependency.parser.util.DependencyUtility; -import org.apache.ctakes.temporal.eval.THYMEData; import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode; import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode; import org.apache.ctakes.typesystem.type.syntax.TreebankNode; import org.apache.ctakes.typesystem.type.textsem.Markable; +import org.apache.ctakes.typesystem.type.textsem.TimeMention; import org.apache.ctakes.typesystem.type.textspan.Segment; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; @@ -40,6 +40,19 @@ public class DeterministicMarkableAnnota // createMarkablesUsingConstituencyTrees(jCas); createMarkablesUsingDependencyTrees(jCas); + for(TimeMention timex : JCasUtil.select(jCas, TimeMention.class)){ + boolean collision = false; + for(Markable other : JCasUtil.selectCovered(jCas, Markable.class, timex.getBegin(), timex.getEnd())){ + if(other.getBegin() == timex.getBegin() && other.getEnd() == timex.getEnd()){ + collision = true; + break; + } + } + if(!collision){ + Markable m = new Markable(jCas, timex.getBegin(), timex.getEnd()); + m.addToIndexes(jCas); + } + } } private static void createMarkablesUsingDependencyTrees(JCas jCas) { @@ -65,7 +78,7 @@ public class DeterministicMarkableAnnota } int begin = node.getBegin(); int end = node.getEnd(); - if(node.getHead().getId() != 0){ +// if(node.getHead().getId() != 0){ List progeny = getProgeny(node, getDependencyNodes(jCas, getSentence(jCas, node))); progeny = removeConjunctionNodes(node, progeny); if(progeny.size() > 0){ @@ -78,13 +91,14 @@ public class DeterministicMarkableAnnota } } } - } +// } Markable markable = new Markable(jCas, begin, end); markable.addToIndexes(); }else if(node.getPostag().equals("DT") && !node.getDeprel().equals("det")){ Markable markable = new Markable(jCas, node.getBegin(), node.getEnd()); markable.addToIndexes(); - }else if(node.getCoveredText().toLowerCase().equals("it") && !node.getDeprel().contains("pass")){ + }else if(node.getCoveredText().toLowerCase().equals("it") && node.getDeprel().contains("bj")){ + // contains "bj" includes nsubj, all the obj's, and all the *bjpass*'s. Markable markable = new Markable(jCas, node.getBegin(), node.getEnd()); markable.addToIndexes(); } @@ -102,7 +116,7 @@ public class DeterministicMarkableAnnota boolean blockedByConj = false; for(ConllDependencyNode pathEl : DependencyUtility.getPath(progeny, node, originalNode)){ if(pathEl == originalNode) continue; - if(pathEl.getDeprel().equals("conj") || pathEl.getDeprel().equals("cc") || pathEl.getPostag().equals(".") || pathEl.getPostag().equals(",")){ + if(pathEl.getDeprel().equals("conj") || pathEl.getDeprel().equals("cc") || pathEl.getPostag().equals(".") || pathEl.getPostag().equals(",") || pathEl.getDeprel().equals("meta")){ blockedByConj = true; break; }