ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dlig...@apache.org
Subject svn commit: r1505739 - /ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/IdentifiedAnnotationExpander.java
Date Mon, 22 Jul 2013 17:37:17 GMT
Author: dligach
Date: Mon Jul 22 17:37:17 2013
New Revision: 1505739

URL: http://svn.apache.org/r1505739
Log:
added a version of NP expander that simply returns the text of the enclosing NP

Modified:
    ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/IdentifiedAnnotationExpander.java

Modified: ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/IdentifiedAnnotationExpander.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/IdentifiedAnnotationExpander.java?rev=1505739&r1=1505738&r2=1505739&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/IdentifiedAnnotationExpander.java
(original)
+++ ctakes/trunk/ctakes-relation-extractor/src/main/java/org/apache/ctakes/relationextractor/ae/IdentifiedAnnotationExpander.java
Mon Jul 22 17:37:17 2013
@@ -68,4 +68,39 @@ public class IdentifiedAnnotationExpande
 
     return originalSpan;
   }
+  
+  public static String getEnclosingNP(JCas jCas, IdentifiedAnnotation identifiedAnnotation)
{
+
+    // map each covering treebank node to its character length
+    Map<TreebankNode, Integer> treebankNodeSizes = new HashMap<TreebankNode, Integer>();
+    for(TreebankNode treebankNode : JCasUtil.selectCovering(
+        jCas, 
+        TreebankNode.class, 
+        identifiedAnnotation.getBegin(), 
+        identifiedAnnotation.getEnd())) {
+
+      // only expand nouns (and not verbs or adjectives)
+      if(treebankNode instanceof TerminalTreebankNode) {
+        if(! treebankNode.getNodeType().startsWith("N")) {
+          return identifiedAnnotation.getCoveredText();
+        }
+      }
+
+      // because only nouns are expanded, look for covering NPs
+      if(treebankNode.getNodeType().equals("NP")) {
+        treebankNodeSizes.put(treebankNode, treebankNode.getCoveredText().length());
+      }
+    }
+
+    // find the shortest covering treebank node
+    List<TreebankNode> sortedTreebankNodes = new ArrayList<TreebankNode>(treebankNodeSizes.keySet());
+    Function<TreebankNode, Integer> getValue = Functions.forMap(treebankNodeSizes);
+    Collections.sort(sortedTreebankNodes, Ordering.natural().onResultOf(getValue));
+
+    if(sortedTreebankNodes.size() > 0) {
+      return sortedTreebankNodes.get(0).getCoveredText();
+    } 
+
+    return identifiedAnnotation.getCoveredText();
+  }
 }



Mime
View raw message