ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1715313 - in /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae: MentionClusterCoreferenceAnnotator.java features/TokenFeatureExtractor.java features/cluster/MentionClusterAgreementFeaturesExtractor.java
Date Fri, 20 Nov 2015 02:14:39 GMT
Author: tmill
Date: Fri Nov 20 02:14:39 2015
New Revision: 1715313

URL: http://svn.apache.org/viewvc?rev=1715313&view=rev
Log:
New features to move negation into a mention feature and add an uncertainty feature.

Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java?rev=1715313&r1=1715312&r2=1715313&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
Fri Nov 20 02:14:39 2015
@@ -14,9 +14,9 @@ import java.util.Set;
 
 import org.apache.ctakes.core.util.ListFactory;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAgreementFeaturesExtractor;
+import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAttributeAgreementFeaturesExtractor;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterDepHeadExtractor;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterDistSemExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterDistanceFeaturesExtractor;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterMentionFeaturesExtractor;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterSalienceFeaturesExtractor;
 import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterSectionFeaturesExtractor;
@@ -118,6 +118,7 @@ public class MentionClusterCoreferenceAn
     extractors.add(new MentionClusterDepHeadExtractor());
     extractors.add(new MentionClusterStackFeaturesExtractor());
     extractors.add(new MentionClusterSalienceFeaturesExtractor());
+    extractors.add(new MentionClusterAttributeAgreementFeaturesExtractor());
 //    extractors.add(new MentionClusterDistanceFeaturesExtractor());
     
     try {
@@ -134,6 +135,7 @@ public class MentionClusterCoreferenceAn
     List<FeatureExtractor1<Markable>> extractors = new ArrayList<>();
     // mention features from pairwise system:
     extractors.add(new MentionClusterMentionFeaturesExtractor());
+    extractors.add(new MentionClusterAttributeAgreementFeaturesExtractor());
 
     return extractors;
   }
@@ -144,15 +146,10 @@ public class MentionClusterCoreferenceAn
     int sentDist = 5;
     // using linked hash set ensures no duplicates:
     LinkedHashSet<CollectionTextRelationIdentifiedAnnotationPair> pairs = new LinkedHashSet<>();
-//    if(mention.getCoveredText().equalsIgnoreCase("this")){
-//      pairs.addAll(getSentenceDistancePairs(jcas, mention, 1));
-//      pairs.addAll(getClusterPairs(jcas, mention, 3));
-//    }else{
-      pairs.addAll(getSentenceDistancePairs(jcas, mention, sentDist));
-      pairs.addAll(getSectionHeaderPairs(jcas, mention, sentDist));
-      pairs.addAll(getClusterPairs(jcas, mention, Integer.MAX_VALUE));
-      pairs.addAll(getHeadwordMatchPairs(jcas, mention, sentDist));
-//    }
+    pairs.addAll(getSentenceDistancePairs(jcas, mention, sentDist));
+    pairs.addAll(getSectionHeaderPairs(jcas, mention, sentDist));
+    pairs.addAll(getClusterPairs(jcas, mention, Integer.MAX_VALUE));
+    pairs.addAll(getHeadwordMatchPairs(jcas, mention, sentDist));
     
     return pairs;
   }
@@ -373,7 +370,6 @@ public class MentionClusterCoreferenceAn
     for(Segment segment : JCasUtil.select(jCas, Segment.class)){
       for(Markable mention : JCasUtil.selectCovered(jCas, Markable.class, segment)){
         ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, mention);
-        String mentionText = mention.getCoveredText().toLowerCase();
         boolean singleton = true;
         double maxScore = 0.0;
         CollectionTextRelation maxCluster = null;
@@ -389,7 +385,7 @@ public class MentionClusterCoreferenceAn
               features.addAll(feats);
             }
           }
-          
+                 
           for(FeatureExtractor1<Markable> extractor : this.mentionExtractors){
             features.addAll(extractor.extract(jCas, mention));
           }
@@ -424,17 +420,20 @@ public class MentionClusterCoreferenceAn
 */
               
               // UMLS semantic type feature conjunctions
+              /*
               for(Feature feat : features){
                 if(feat.getName().startsWith("ClusterSemType")){
                   dupFeatures.add(new Feature(feat.getName()+"_"+feature.getName(), feature.getValue()));
                 }
               }
+              */
               
               if(prefix != null){
                 dupFeatures.add(new Feature(prefix+"_"+feature.getName(), feature.getValue()));
               }
-            }
+            }            
           }
+          
           features.addAll(dupFeatures);
           
           

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java?rev=1715313&r1=1715312&r2=1715313&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/TokenFeatureExtractor.java
Fri Nov 20 02:14:39 2015
@@ -148,4 +148,8 @@ public class TokenFeatureExtractor imple
 	public static boolean isNegated(IdentifiedAnnotation mention){
 	  return mention.getPolarity() == CONST.NE_POLARITY_NEGATION_PRESENT;
 	}
+	
+	public static boolean isUncertain(IdentifiedAnnotation mention){
+	  return mention.getUncertainty() == CONST.NE_UNCERTAINTY_PRESENT;
+	}
 }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java?rev=1715313&r1=1715312&r2=1715313&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterAgreementFeaturesExtractor.java
Fri Nov 20 02:14:39 2015
@@ -45,10 +45,7 @@ public class MentionClusterAgreementFeat
 
     boolean singular = numberSingular(jCas, mention, s);
     features.add(new Feature("MC_MENTION_NUMBER", singular));
-    
-    boolean mentionNegated = isNegated(mention);
-    features.add(new Feature("MC_MENTION_NEGATED", mentionNegated));
-    
+        
     boolean mentionTimex = isTimex(mention);
     features.add(new Feature("MC_MENTION_TIMEX", mentionTimex));
     
@@ -56,7 +53,6 @@ public class MentionClusterAgreementFeat
     boolean matchDef = false;
     boolean matchGender = false;
     boolean matchNumber = false;
-    boolean matchNeg = true;
     boolean clusterTimex = false;  // if any cluster member is timex
     
     for(IdentifiedAnnotation member : new ListIterable<IdentifiedAnnotation>(cluster.getMembers())){
@@ -81,9 +77,6 @@ public class MentionClusterAgreementFeat
       if(!matchNumber && numberSingular(jCas, member, m) == singular){
         matchNumber = true;
       }
-      if(mentionNegated != isNegated(member)){
-        matchNeg = false;
-      }
       if(isTimex(member)){
         clusterTimex = true;
       }
@@ -93,7 +86,6 @@ public class MentionClusterAgreementFeat
     features.add(new Feature("MC_AGREE_DEF", matchDef));
     features.add(new Feature("MC_AGREE_GEN", matchGender));
     features.add(new Feature("MC_AGREE_NUM", matchNumber));
-    features.add(new Feature("MC_AGREE_NEG", matchNeg));
     features.add(new Feature("MC_AGREE_TIMEX", clusterTimex == mentionTimex));
     
     /// check attributes like location/degree/negation/uncertainty



Mime
View raw message