Return-Path: X-Original-To: apmail-ctakes-commits-archive@www.apache.org Delivered-To: apmail-ctakes-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 2B0E2C72A for ; Fri, 7 Jun 2013 15:51:57 +0000 (UTC) Received: (qmail 85145 invoked by uid 500); 7 Jun 2013 15:51:57 -0000 Delivered-To: apmail-ctakes-commits-archive@ctakes.apache.org Received: (qmail 85109 invoked by uid 500); 7 Jun 2013 15:51:55 -0000 Mailing-List: contact commits-help@ctakes.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ctakes.apache.org Delivered-To: mailing list commits@ctakes.apache.org Received: (qmail 85099 invoked by uid 99); 7 Jun 2013 15:51:54 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Jun 2013 15:51:54 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Jun 2013 15:51:50 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id D05AE23888D2; Fri, 7 Jun 2013 15:51:29 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1490695 - in /ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature: DependencyParseUtils.java DependencyPathFeaturesExtractor.java Date: Fri, 07 Jun 2013 15:51:29 -0000 To: commits@ctakes.apache.org From: dligach@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130607155129.D05AE23888D2@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: dligach Date: Fri Jun 7 15:51:29 2013 New Revision: 1490695 URL: http://svn.apache.org/r1490695 Log: modified the dependency path feature to exclude the words on both ends of the path Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java?rev=1490695&r1=1490694&r2=1490695&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyParseUtils.java Fri Jun 7 15:51:29 2013 @@ -18,136 +18,139 @@ */ package org.apache.ctakes.temporal.ae.feature; -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; - -import org.apache.uima.jcas.JCas; -import org.apache.uima.jcas.tcas.Annotation; -import org.uimafit.util.JCasUtil; - -import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode; - +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode; +import org.apache.uima.jcas.JCas; +import org.apache.uima.jcas.tcas.Annotation; +import org.uimafit.util.JCasUtil; + +/** + * This is a slightly modified version of the same class from relation extraction. + * TODO: eventually replace the relation extraction version with this one. + */ public class DependencyParseUtils { - /** - * Returns the paths from each node to the common ancestor between them - */ - public static List> getPathsToCommonAncestor(ConllDependencyNode node1, ConllDependencyNode node2) { - List> paths = new ArrayList>(2); - LinkedList node1ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node1); - LinkedList node2ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node2); - - // We will remove the last item in each path until they diverge - ConllDependencyNode ancestor = null; - while (!node1ToHeadPath.isEmpty() && !node2ToHeadPath.isEmpty()) { - if (node1ToHeadPath.getLast() == node2ToHeadPath.getLast()) { - node1ToHeadPath.removeLast(); - ancestor = node2ToHeadPath.removeLast(); - } else { - break; - } - } - - // Put the common ancestor back on both paths - if (ancestor != null) { - node1ToHeadPath.add(ancestor); - node2ToHeadPath.add(ancestor); - } - - paths.add(node1ToHeadPath); - paths.add(node2ToHeadPath); - return paths; - } - - /** - * Finds the head word within a given annotation span - */ - public static ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) { - - for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) { - - ConllDependencyNode head = depNode.getHead(); - if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) { - // The head is outside the bounds of the annotation, so this node must be the annotation's head - return depNode; - } - } - // Can this happen? - return null; - } - - public static LinkedList getPathToSentenceHead(ConllDependencyNode node) { - LinkedList path = new LinkedList(); - ConllDependencyNode currNode = node; - while (currNode.getHead() != null) { - path.add(currNode); - currNode = currNode.getHead(); - } - return path; - } - - /** - * Finds the path between two dependency nodes - */ - public static LinkedList getPathBetweenNodes(ConllDependencyNode srcNode, ConllDependencyNode tgtNode) { - LinkedList path = new LinkedList(); - List> paths = getPathsToCommonAncestor(srcNode, tgtNode); - LinkedList srcToAncestorPath = paths.get(0); - LinkedList tgtToAncestorPath = paths.get(1); - - if (srcNode == tgtNode) { - return path; - } - - // Join the two paths - if (!srcToAncestorPath.isEmpty()) { - srcToAncestorPath.removeLast(); - } - path = srcToAncestorPath; - while (!tgtToAncestorPath.isEmpty()) { - path.add(tgtToAncestorPath.removeLast()); - } - - return path; - } - - - /** - * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between - */ - - public static String pathToString(LinkedList path) { - - StringBuilder builder = new StringBuilder(); - for (ConllDependencyNode node : path) { - if (node == path.getFirst() || node == path.getLast()) { - builder.append(node.getCoveredText()); - } else { - builder.append(node.getPostag()); - } - - builder.append("-"); - builder.append(node.getDeprel()); - if (node != path.getLast()) { - builder.append("/"); - } - } - return builder.toString(); - } - - - - - public static String dumpDependencyRelations(JCas jcas, Annotation annotation) { - StringBuilder builder = new StringBuilder(); - for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) { - if (depNode.getHead() != null) { - builder.append(String.format("%s(%s,%s)\n", depNode.getDeprel(), depNode.getCoveredText(), depNode.getHead().getCoveredText())); - } - } - return builder.toString(); - - } - + /** + * Returns the paths from each node to the common ancestor between them + */ + public static List> getPathsToCommonAncestor(ConllDependencyNode node1, ConllDependencyNode node2) { + + List> paths = new ArrayList>(2); + LinkedList node1ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node1); + LinkedList node2ToHeadPath = DependencyParseUtils.getPathToSentenceHead(node2); + + // We will remove the last item in each path until they diverge + ConllDependencyNode ancestor = null; + while (!node1ToHeadPath.isEmpty() && !node2ToHeadPath.isEmpty()) { + if (node1ToHeadPath.getLast() == node2ToHeadPath.getLast()) { + node1ToHeadPath.removeLast(); + ancestor = node2ToHeadPath.removeLast(); + } else { + break; + } + } + + // Put the common ancestor back on both paths + if (ancestor != null) { + node1ToHeadPath.add(ancestor); + node2ToHeadPath.add(ancestor); + } + + paths.add(node1ToHeadPath); + paths.add(node2ToHeadPath); + + return paths; + } + + /** + * Finds the head word within a given annotation span + */ + public static ConllDependencyNode findAnnotationHead(JCas jcas, Annotation annotation) { + + for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) { + + ConllDependencyNode head = depNode.getHead(); + if (head == null || head.getEnd() <= annotation.getBegin() || head.getBegin() > annotation.getEnd()) { + // The head is outside the bounds of the annotation, so this node must be the annotation's head + return depNode; + } + } + + // Can this happen? + return null; + } + + public static LinkedList getPathToSentenceHead(ConllDependencyNode node) { + + LinkedList path = new LinkedList(); + ConllDependencyNode currNode = node; + while (currNode.getHead() != null) { + path.add(currNode); + currNode = currNode.getHead(); + } + + return path; + } + + /** + * Finds the path between two dependency nodes + */ + public static LinkedList getPathBetweenNodes(ConllDependencyNode srcNode, ConllDependencyNode tgtNode) { + + LinkedList path = new LinkedList(); + List> paths = getPathsToCommonAncestor(srcNode, tgtNode); + LinkedList srcToAncestorPath = paths.get(0); + LinkedList tgtToAncestorPath = paths.get(1); + + if (srcNode == tgtNode) { + return path; + } + + // Join the two paths + if (!srcToAncestorPath.isEmpty()) { + srcToAncestorPath.removeLast(); + } + path = srcToAncestorPath; + while (!tgtToAncestorPath.isEmpty()) { + path.add(tgtToAncestorPath.removeLast()); + } + + return path; + } + + + /** + * This will convert a path into a string lexicalized at the end points with arc labels and POS tags in between + */ + public static String pathToString(LinkedList path) { + + StringBuilder builder = new StringBuilder(); + for (ConllDependencyNode node : path) { + if (node != path.getFirst() && node != path.getLast()) { + builder.append(node.getPostag()); + builder.append("-"); + } + builder.append(node.getDeprel()); + if (node != path.getLast()) { + builder.append("/"); + } + } + + return builder.toString(); + } + + public static String dumpDependencyRelations(JCas jcas, Annotation annotation) { + + StringBuilder builder = new StringBuilder(); + for (ConllDependencyNode depNode : JCasUtil.selectCovered(jcas, ConllDependencyNode.class, annotation)) { + if (depNode.getHead() != null) { + builder.append(String.format("%s(%s,%s)\n", depNode.getDeprel(), depNode.getCoveredText(), depNode.getHead().getCoveredText())); + } + } + + return builder.toString(); + } } Modified: ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java?rev=1490695&r1=1490694&r2=1490695&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java (original) +++ ctakes/trunk/ctakes-temporal/src/main/java/org/apache/ctakes/temporal/ae/feature/DependencyPathFeaturesExtractor.java Fri Jun 7 15:51:29 2013 @@ -30,30 +30,24 @@ import org.apache.uima.jcas.JCas; import org.cleartk.classifier.Feature; public class DependencyPathFeaturesExtractor implements RelationFeaturesExtractor { - - - @Override - public List extract(JCas jCas, IdentifiedAnnotation arg1, - IdentifiedAnnotation arg2) throws AnalysisEngineProcessException { - - List features = new ArrayList(); - - ConllDependencyNode node1 = DependencyParseUtils.findAnnotationHead(jCas, arg1); - ConllDependencyNode node2 = DependencyParseUtils.findAnnotationHead(jCas, arg2); - if (node1 == null || node2 == null) { return features; } - - List> paths = DependencyParseUtils.getPathsToCommonAncestor(node1, node2); - LinkedList path1 = paths.get(0); - LinkedList path2 = paths.get(1); - - features.add(new Feature("DEPENDENCY_PATH_MEAN_DISTANCE_TO_COMMON_ANCESTOR", (path1.size() + path2.size()) / 2.0)); - features.add(new Feature("DEPENDENCY_PATH_MAX_DISTANCE_TO_COMMON_ANCESTOR", Math.max(path1.size(), path2.size()))); - features.add(new Feature("DEPENDENCY_PATH_MIN_DISTANCE_TO_COMMON_ANCESTOR", Math.min(path1.size(), path2.size()))); - - LinkedList node1ToNode2Path = DependencyParseUtils.getPathBetweenNodes(node1, node2); - features.add(new Feature("DEPENDENCY_PATH", DependencyParseUtils.pathToString(node1ToNode2Path))); - - return features; - } + + @Override + public List extract(JCas jCas, IdentifiedAnnotation arg1, + IdentifiedAnnotation arg2) throws AnalysisEngineProcessException { + + List features = new ArrayList(); + + ConllDependencyNode node1 = DependencyParseUtils.findAnnotationHead(jCas, arg1); + ConllDependencyNode node2 = DependencyParseUtils.findAnnotationHead(jCas, arg2); + if (node1 == null || node2 == null) + { + return features; + } + + LinkedList node1ToNode2Path = DependencyParseUtils.getPathBetweenNodes(node1, node2); + features.add(new Feature("dependency_path", DependencyParseUtils.pathToString(node1ToNode2Path))); + + return features; + } }