ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1680388 - /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
Date Tue, 19 May 2015 20:56:04 GMT
Author: tmill
Date: Tue May 19 20:56:04 2015
New Revision: 1680388

URL: http://svn.apache.org/r1680388
Log:
Feature for agreement of location of text relation.

Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java?rev=1680388&r1=1680387&r2=1680388&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
Tue May 19 20:56:04 2015
@@ -6,13 +6,25 @@ import static org.apache.ctakes.corefere
 import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.numberSingular;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.ctakes.core.util.ListIterable;
+import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
+import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
+import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
+import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
+import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.cleartk.ml.Feature;
 
@@ -22,6 +34,8 @@ public class MentionClusterAgreementFeat
       IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
     List<Feature> features = new ArrayList<>();
     
+    ConllDependencyNode mentionHead = DependencyUtility.getNominalHeadNode(jCas, mention);
+    
     String s = mention.getCoveredText().toLowerCase();
     boolean isDem = isDemonstrative(s);
     boolean isDef = isDefinite(s);
@@ -68,6 +82,61 @@ public class MentionClusterAgreementFeat
     features.add(new Feature("MC_AGREE_GEN", matchGender));
     features.add(new Feature("MC_AGREE_NUM", matchNumber));
     
+    /// check attributes like location/degree/negation/uncertainty
+    /*
+    Set<String> mentionSites = new HashSet<>();
+    
+    
+    if(mentionHead != null){
+      for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class,
mentionHead)){
+        LocationOfTextRelation rel = getLocation(annot);
+        if(rel != null){
+          AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
+          for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
+            mentionSites.add(concept.getCui());
+          }
+        }
+      }
+    }
+
+    if(mentionSites.size() > 0){
+      Set<String> memberSites = new HashSet<>();
+      for(Markable member : JCasUtil.select(cluster.getMembers(), Markable.class)){
+        if(mention.getBegin() <= member.getBegin()) break;
+        ConllDependencyNode memberHead = DependencyUtility.getNominalHeadNode(jCas, member);
+        if(memberHead == null) continue;
+        
+        for(IdentifiedAnnotation annot : JCasUtil.selectCovering(jCas, IdentifiedAnnotation.class,
memberHead)){
+          LocationOfTextRelation rel = getLocation(annot);
+          if(rel != null){
+            boolean conflict = true;
+            AnatomicalSiteMention site = (AnatomicalSiteMention)rel.getArg2().getArgument();
+            for(UmlsConcept concept : JCasUtil.select(site.getOntologyConceptArr(), UmlsConcept.class)){
+              memberSites.add(concept.getCui());
+              if(mentionSites.contains(concept.getCui())){
+                conflict = false;
+              }
+            }
+            if(conflict){
+              features.add(new Feature("MC_LOCATION_CONFLICT", true));
+            }
+          }
+        }
+      }
+    }
+    */
     return features;
   }
+  
+  private LocationOfTextRelation getLocation(IdentifiedAnnotation annot){
+    LocationOfTextRelation rel = null;
+    if(annot instanceof ProcedureMention){
+      rel = ((ProcedureMention)annot).getBodyLocation();
+    }else if(annot instanceof DiseaseDisorderMention){
+      rel = ((DiseaseDisorderMention)annot).getBodyLocation();
+    }else if(annot instanceof SignSymptomMention){
+      rel = ((SignSymptomMention)annot).getBodyLocation();
+    }
+    return rel;
+  }
 }



Mime
View raw message