ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1683183 - /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
Date Tue, 02 Jun 2015 20:49:36 GMT
Author: tmill
Date: Tue Jun  2 20:49:35 2015
New Revision: 1683183

URL: http://svn.apache.org/r1683183
Log:
Improvement to markable detection -- if head is inside the markable trim it.

Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java?rev=1683183&r1=1683182&r2=1683183&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.java
Tue Jun  2 20:49:35 2015
@@ -10,6 +10,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.ctakes.dependency.parser.util.DependencyUtility;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
 import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
@@ -92,6 +93,20 @@ public class DeterministicMarkableAnnota
               }
             }
 //          }
+          ConllDependencyNode parent = node.getHead();
+          if(parent != null && parent.getId() != 0){ 
+            // if parent is inside the bounds of the proposed markable prune it a bit.
+            if(parent.getBegin() < node.getBegin() && parent.getBegin() > begin){
+              // get the following token:
+              BaseToken nextToken = JCasUtil.selectFollowing(BaseToken.class, parent, 1).get(0);
+              begin = nextToken.getBegin();              
+            }
+            // parent is after the current head node but before the proposed markable is
meant to end:
+            if(parent.getEnd() >  node.getEnd() && parent.getEnd() < end){
+              BaseToken prevToken = JCasUtil.selectPreceding(BaseToken.class, parent, 1).get(0);
+              end = prevToken.getEnd();
+            }
+          }
           Markable markable = new Markable(jCas, begin, end);
           markable.addToIndexes();
         }else if(node.getPostag().equals("DT") && !node.getDeprel().equals("det")){



Mime
View raw message