ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1596002 - in /ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors: AssertionAboveLeftTreeExtractor.java AssertionDependencyTreeExtractor.java DependencyWordsFragmentExtractor.java
Date Mon, 19 May 2014 19:32:52 GMT
Author: tmill
Date: Mon May 19 19:32:52 2014
New Revision: 1596002

URL: http://svn.apache.org/r1596002
Log:
CTAKES-297: Added syntactic feature extractors for assertion module.

Added:
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionAboveLeftTreeExtractor.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionDependencyTreeExtractor.java
    ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyWordsFragmentExtractor.java

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionAboveLeftTreeExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionAboveLeftTreeExtractor.java?rev=1596002&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionAboveLeftTreeExtractor.java
(added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionAboveLeftTreeExtractor.java
Mon May 19 19:32:52 2014
@@ -0,0 +1,38 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import static org.apache.ctakes.assertion.util.AssertionTreeUtils.extractAboveLeftConceptTree;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.assertion.util.SemanticClasses;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.TreeFeature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.util.CleartkInitializationException;
+
+public class AssertionAboveLeftTreeExtractor implements SimpleFeatureExtractor {
+  protected SemanticClasses sems = null;
+
+  public AssertionAboveLeftTreeExtractor() throws CleartkInitializationException{
+    try{
+      sems = new SemanticClasses(FileLocator.getAsStream("org/apache/ctakes/assertion/all_cues.txt"));
+    }catch(Exception e){
+      throw new CleartkInitializationException(e, "org/apache/ctakes/assertion/semantic_classes",
"Could not find semantic classes resource.", new Object[]{});
+    }
+  }
+  
+  @Override
+  public List<Feature> extract(JCas jcas, Annotation annotation)
+      throws CleartkExtractorException {
+    List<Feature> features = new ArrayList<Feature>();
+    SimpleTree tree = extractAboveLeftConceptTree(jcas, annotation, sems);
+    features.add(new TreeFeature("TK_AboveLeftTree", tree.toString()));
+    return features;
+  }
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionDependencyTreeExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionDependencyTreeExtractor.java?rev=1596002&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionDependencyTreeExtractor.java
(added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/AssertionDependencyTreeExtractor.java
Mon May 19 19:32:52 2014
@@ -0,0 +1,67 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.util.List;
+
+import org.apache.ctakes.assertion.pipelines.GenerateDependencyRepresentation;
+import org.apache.ctakes.assertion.util.AssertionDepUtils;
+import org.apache.ctakes.assertion.util.AssertionTreeUtils;
+import org.apache.ctakes.assertion.util.SemanticClasses;
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.classifier.TreeFeature;
+import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
+import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
+import org.cleartk.util.CleartkInitializationException;
+import org.uimafit.util.JCasUtil;
+
+import com.google.common.collect.Lists;
+
+public class AssertionDependencyTreeExtractor implements SimpleFeatureExtractor {
+  protected SemanticClasses sems = null;
+
+  public AssertionDependencyTreeExtractor() throws CleartkInitializationException {
+    try{
+      sems = new SemanticClasses(FileLocator.getAsStream("org/apache/ctakes/assertion/all_cues.txt"));
+    }catch(Exception e){
+      throw new CleartkInitializationException(e, "org/apache/ctakes/assertion/semantic_classes",
"Could not find semantic classes resource.", new Object[]{});
+    }
+  }
+  
+  @Override
+  public List<Feature> extract(JCas jCas, Annotation arg1)
+      throws CleartkExtractorException {
+    List<Feature> feats = Lists.newArrayList();
+    TreeFeature f1 = null;
+    String treeString = null;
+    
+    List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, arg1.getBegin(),
arg1.getEnd());
+    if(sents == null || sents.size() == 0){
+      treeString = "(S (no parse))";
+    }else{
+      Sentence sent = sents.get(0);
+      List<ConllDependencyNode> nodes = JCasUtil.selectCovered(ConllDependencyNode.class,
sent);
+    
+      //treeString = AnnotationDepUtils.getTokenRelTreeString(jCas, nodes, new Annotation[]{arg1},
new String[]{"CONCEPT"}, true);
+//      treeString = AssertionDepUtils.getTokenRelTreeString(jCas, nodes, arg1, "CONCEPT");
+      SimpleTree tree = AssertionDepUtils.getTokenTreeString(jCas, nodes, arg1, GenerateDependencyRepresentation.UP_NODES);
+      
+      if(tree == null){
+        treeString = "(S (no parse))";
+      }else{
+        AssertionTreeUtils.replaceDependencyWordsWithSemanticClasses(tree, sems);
+        treeString = tree.toString();
+//        treeString = treeString.replaceAll("\\(([^ ]+) \\)", "$1");
+      }
+    }
+
+    f1 = new TreeFeature("TK_DW", treeString);   
+    feats.add(f1);
+    return feats;
+  }
+
+}

Added: ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyWordsFragmentExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyWordsFragmentExtractor.java?rev=1596002&view=auto
==============================================================================
--- ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyWordsFragmentExtractor.java
(added)
+++ ctakes/trunk/ctakes-assertion/src/main/java/org/apache/ctakes/assertion/medfacts/cleartk/extractors/DependencyWordsFragmentExtractor.java
Mon May 19 19:32:52 2014
@@ -0,0 +1,50 @@
+package org.apache.ctakes.assertion.medfacts.cleartk.extractors;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.ctakes.assertion.pipelines.GenerateDependencyRepresentation;
+import org.apache.ctakes.assertion.util.AssertionDepUtils;
+import org.apache.ctakes.assertion.util.AssertionTreeUtils;
+import org.apache.ctakes.constituency.parser.util.TreeUtils;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.utils.tree.SimpleTree;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.classifier.Feature;
+import org.cleartk.util.CleartkInitializationException;
+import org.uimafit.util.JCasUtil;
+
+public class DependencyWordsFragmentExtractor extends TreeFragmentFeatureExtractor {
+
+  public DependencyWordsFragmentExtractor(String prefix, String fragsPath) throws CleartkInitializationException
{
+    super(prefix, fragsPath);
+  }
+
+  @Override
+  public List<Feature> extract(JCas jCas, Annotation mention) {
+    List<Feature> features = new ArrayList<Feature>();
+
+    List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, mention.getBegin(),
mention.getEnd());
+    if(sents != null && sents.size() > 0){
+
+      Sentence sent = sents.get(0);
+      List<ConllDependencyNode> nodes = JCasUtil.selectCovered(ConllDependencyNode.class,
sent);
+
+      SimpleTree tree = AssertionDepUtils.getTokenTreeString(jCas, nodes, mention, GenerateDependencyRepresentation.UP_NODES);
+      if(tree == null){
+        System.err.println("Tree is null!");
+      }else{
+        AssertionTreeUtils.replaceDependencyWordsWithSemanticClasses(tree, sems);
+        for(SimpleTree frag : frags){
+          if(TreeUtils.containsDepFragIgnoreCase(tree, frag)){
+            features.add(new Feature("TreeFrag_" + prefix, frag.toString()));
+          }
+        }
+      }
+
+    }
+    return features;
+  }
+}
\ No newline at end of file



Mime
View raw message