ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1696982 - in /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features: StringMatchingFeatureExtractor.java TokenFeatureExtractor.java cluster/MentionClusterAgreementFeaturesExtractor.java
Date Fri, 21 Aug 2015 13:41:32 GMT
Author: tmill
Date: Fri Aug 21 13:41:32 2015
New Revision: 1696982

URL: http://svn.apache.org/r1696982
Log:
Added some methods for computing agreement for various semantic features (negation, timex,
etc.)

Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java?rev=1696982&r1=1696981&r2=1696982&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
Fri Aug 21 13:41:32 2015
@@ -116,4 +116,22 @@ public class StringMatchingFeatureExtrac
 	  
 	  return false;
 	}
+	
+	public static boolean inQuote(JCas jcas, Annotation a){
+	  boolean inQuote = false;
+	  String docText = jcas.getDocumentText();
+	  
+	  // Logic: Find the newline preceding this mention, if there is a quote in between
+	  // the start of the line and the start of the mention then the mention is inside quotes.
+	  // not foolproof but probably pretty accurate.
+	  int lastNewline = docText.lastIndexOf("\n", a.getBegin());
+	  if(lastNewline != 0){
+	    int firstQuote = docText.indexOf('"', lastNewline);
+	    if(firstQuote != 0){
+	      inQuote = true;
+	    }
+	  }
+	  
+	  return inQuote;
+	}
 }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java?rev=1696982&r1=1696981&r2=1696982&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
Fri Aug 21 13:41:32 2015
@@ -5,6 +5,7 @@ import java.util.List;
 
 import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.constants.CONST;
 import org.apache.ctakes.typesystem.type.syntax.BaseToken;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
@@ -143,4 +144,8 @@ public class TokenFeatureExtractor imple
 	public static boolean isTitle(String s1){
 	  return s1.startsWith("dr.") || s1.startsWith("mr.") || s1.startsWith("mrs.") || s1.startsWith("ms.");
 	}
+	
+	public static boolean isNegated(IdentifiedAnnotation mention){
+	  return mention.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT;
+	}
 }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java?rev=1696982&r1=1696981&r2=1696982&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
Fri Aug 21 13:41:32 2015
@@ -3,29 +3,27 @@ package org.apache.ctakes.coreference.ae
 import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.getGender;
 import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.isDefinite;
 import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.isDemonstrative;
+import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.isNegated;
 import static org.apache.ctakes.coreference.ae.features.TokenFeatureExtractor.numberSingular;
 
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 import org.apache.ctakes.core.util.ListIterable;
 import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
-import org.apache.ctakes.typesystem.type.refsem.UmlsConcept;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
-import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
 import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.Markable;
 import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
 import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
+import org.apache.ctakes.typesystem.type.textsem.TimeMention;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.cleartk.ml.Feature;
 
 public class MentionClusterAgreementFeaturesExtractor implements RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>
{
@@ -48,14 +46,22 @@ public class MentionClusterAgreementFeat
     boolean singular = numberSingular(jCas, mention, s);
     features.add(new Feature("MC_MENTION_NUMBER", singular));
     
+    boolean mentionNegated = isNegated(mention);
+    features.add(new Feature("MC_MENTION_NEGATED", mentionNegated));
+    
+    boolean mentionTimex = isTimex(mention);
+    features.add(new Feature("MC_MENTION_TIMEX", mentionTimex));
+    
     boolean matchDem = false;
     boolean matchDef = false;
     boolean matchGender = false;
     boolean matchNumber = false;
+    boolean matchNeg = true;
+    boolean clusterTimex = false;  // if any cluster member is timex
     
     for(IdentifiedAnnotation member : new ListIterable<IdentifiedAnnotation>(cluster.getMembers())){
       if(member == null){
-        System.err.println("Something that shouldn't happen has happened");
+        System.err.println("Found an empty cluster member in agreement features extractor.");
         continue;
       }else if(mention.getBegin() < member.getEnd()){
         // during training this might happen -- see a member of a cluster that
@@ -75,12 +81,20 @@ public class MentionClusterAgreementFeat
       if(!matchNumber && numberSingular(jCas, member, m) == singular){
         matchNumber = true;
       }
+      if(mentionNegated != isNegated(member)){
+        matchNeg = false;
+      }
+      if(isTimex(member)){
+        clusterTimex = true;
+      }
     }
     
     features.add(new Feature("MC_AGREE_DEM", matchDem));
     features.add(new Feature("MC_AGREE_DEF", matchDef));
     features.add(new Feature("MC_AGREE_GEN", matchGender));
     features.add(new Feature("MC_AGREE_NUM", matchNumber));
+    features.add(new Feature("MC_AGREE_NEG", matchNeg));
+    features.add(new Feature("MC_AGREE_TIMEX", clusterTimex == mentionTimex));
     
     /// check attributes like location/degree/negation/uncertainty
     /*
@@ -139,4 +153,8 @@ public class MentionClusterAgreementFeat
     }
     return rel;
   }
+  
+  private boolean isTimex(Annotation a){
+    return JCasUtil.selectCovered(TimeMention.class, a).size() > 0;
+  }
 }



Mime
View raw message