ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1713835 - in /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae: ./ features/cluster/
Date Wed, 11 Nov 2015 13:18:52 GMT
Author: tmill
Date: Wed Nov 11 13:18:52 2015
New Revision: 1713835

URL: http://svn.apache.org/viewvc?rev=1713835&view=rev
Log:
Some fixes to ranking annotator.

Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java?rev=1713835&r1=1713834&r2=1713835&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
Wed Nov 11 13:18:52 2015
@@ -147,15 +147,10 @@ public class MentionClusterRankingCorefe
     int sentDist = 5;
     // using linked hash set ensures no duplicates:
     LinkedHashSet<CollectionTextRelationIdentifiedAnnotationPair> pairs = new LinkedHashSet<>();
-//    if(mention.getCoveredText().equalsIgnoreCase("this")){
-//      pairs.addAll(getSentenceDistancePairs(jcas, mention, 1));
-//      pairs.addAll(getClusterPairs(jcas, mention, 3));
-//    }else{
-      pairs.addAll(getSentenceDistancePairs(jcas, mention, sentDist));
-      pairs.addAll(getSectionHeaderPairs(jcas, mention, sentDist));
-      pairs.addAll(getClusterPairs(jcas, mention, Integer.MAX_VALUE));
-      pairs.addAll(getHeadwordMatchPairs(jcas, mention, sentDist));
-//    }
+    pairs.addAll(getSentenceDistancePairs(jcas, mention, sentDist));
+    pairs.addAll(getSectionHeaderPairs(jcas, mention, sentDist));
+    pairs.addAll(getClusterPairs(jcas, mention, Integer.MAX_VALUE));
+    pairs.addAll(getHeadwordMatchPairs(jcas, mention, sentDist));
     
     return pairs;
   }
@@ -378,13 +373,19 @@ public class MentionClusterRankingCorefe
         ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, mention);
         String mentionText = mention.getCoveredText().toLowerCase();
         boolean singleton = true;
-        double maxScore = 0.0;
+        double maxScore = Double.NEGATIVE_INFINITY;
         CollectionTextRelation maxCluster = null;
-        
+        List<Feature> mentionFeatures = new ArrayList<>();
+        for(FeatureExtractor1<Markable> extractor : this.mentionExtractors){
+          mentionFeatures.addAll(extractor.extract(jCas, mention));
+        }
+
         for(CollectionTextRelationIdentifiedAnnotationPair pair : this.getCandidateRelationArgumentPairs(jCas,
mention)){
           CollectionTextRelation cluster = pair.getCluster();
           // apply all the feature extractors to extract the list of features
           List<Feature> features = new ArrayList<>();
+          features.addAll(mentionFeatures);
+          
           for (RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation>
extractor : this.relationExtractors) {
             List<Feature> feats = extractor.extract(jCas, cluster, mention);
             if (feats != null){
@@ -393,9 +394,6 @@ public class MentionClusterRankingCorefe
             }
           }
           
-          for(FeatureExtractor1<Markable> extractor : this.mentionExtractors){
-            features.addAll(extractor.extract(jCas, mention));
-          }
           
           // here is where feature conjunctions can go (dupFeatures)
           List<Feature> dupFeatures = new ArrayList<>();
@@ -407,15 +405,17 @@ public class MentionClusterRankingCorefe
               System.err.println(message);
               //            throw new IllegalArgumentException(String.format(message, feature,
features));
             }else{
-              String prefix = "";
-              if(mentionText.equals("it") || mentionText.equals("this") || mentionText.equals("that")){
-                prefix = "PRO_"+mentionText;
-              }else if(headNode != null && headNode.getPostag() != null){
-                prefix = headNode.getPostag();                
-              }else{
-                prefix = "UNK";
+              String prefix = null;
+//              if(mentionText.equals("it") || mentionText.equals("this") || mentionText.equals("that")){
+//                prefix = "PRO_"+mentionText;
+//              }else if(headNode != null && headNode.getPostag() != null){
+//                prefix = headNode.getPostag();                
+//              }else{
+//                prefix = "UNK";
+//              }
+              if(prefix != null){
+                dupFeatures.add(new Feature(prefix+"_"+feature.getName(), feature.getValue()));
               }
-              dupFeatures.add(new Feature(prefix+"_"+feature.getName(), feature.getValue()));
             }
           }
           features.addAll(dupFeatures);    
@@ -439,6 +439,7 @@ public class MentionClusterRankingCorefe
             inst.setOutcome(outVal);
             this.dataWriter.write(inst);
             if(!category.equals(NO_RELATION_CATEGORY)){
+              singleton = false;
               break;
             }
           }
@@ -464,19 +465,32 @@ public class MentionClusterRankingCorefe
           headWordMarkables.get(head).add(mention);
         }
         
-        // if we got this far and never matched up the
-        if(maxScore > 0){
-          createRelation(jCas, maxCluster, mention, CLUSTER_RELATION_CATEGORY);
+        if(this.isTraining()){
+          // write a dummy link with only mention features:
+          QidInstance<Double> inst = new QidInstance<>();
+          inst.setQid(String.valueOf(qid));
+          inst.addAll(mentionFeatures);
+          if(singleton){
+            inst.setOutcome(1.0);
+          }else{
+            inst.setOutcome(0.0);
+          }
+          this.dataWriter.write(inst);
         }else{
-          // make the markable it's own cluster:
-          CollectionTextRelation chain = new CollectionTextRelation(jCas);
-          NonEmptyFSList list = new NonEmptyFSList(jCas);
-          list.setHead(mention);
-          list.setTail(new EmptyFSList(jCas));
-          chain.setMembers(list);
-          chain.addToIndexes();
-          list.addToIndexes();
-          list.getTail().addToIndexes();
+          Double nullPrediction = this.classify(mentionFeatures);
+          if(nullPrediction > maxScore){
+            // make the markable it's own cluster:
+            CollectionTextRelation chain = new CollectionTextRelation(jCas);
+            NonEmptyFSList list = new NonEmptyFSList(jCas);
+            list.setHead(mention);
+            list.setTail(new EmptyFSList(jCas));
+            chain.setMembers(list);
+            chain.addToIndexes();
+            list.addToIndexes();
+            list.getTail().addToIndexes();
+          }else{
+            createRelation(jCas, maxCluster, mention, CLUSTER_RELATION_CATEGORY);
+          }
         }
         qid++;
       }

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java?rev=1713835&r1=1713834&r2=1713835&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
Wed Nov 11 13:18:52 2015
@@ -63,13 +63,15 @@ public class MentionClusterMentionFeatur
     // token features:
     feats.addAll(tokenContext.extract(view, focusAnnotation));
     
-//    feats.add(new Feature("NumCoveredTokens_" + JCasUtil.selectCovered(BaseToken.class,
focusAnnotation).size()));
+    feats.add(new Feature("NumCoveredTokens_" + JCasUtil.selectCovered(BaseToken.class, focusAnnotation).size()));
     
     // pos features:
-//    feats.addAll(mentionFeaturesExtractor.extract(view, focusAnnotation));
+    feats.addAll(mentionFeaturesExtractor.extract(view, focusAnnotation));
     
-//    feats.addAll(DependencyTreeFeaturesExtractor.extractForNode(view, focusAnnotation,
"dep"));
+    feats.addAll(DependencyTreeFeaturesExtractor.extractForNode(view, focusAnnotation, "dep"));
     
+    feats.add(new Feature("MC_MENTION_SALIENCE", focusAnnotation.getConfidence()));
+
     return feats;
   }
 

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java?rev=1713835&r1=1713834&r2=1713835&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterSalienceFeaturesExtractor.java
Wed Nov 11 13:18:52 2015
@@ -20,8 +20,6 @@ public class MentionClusterSalienceFeatu
       IdentifiedAnnotation mention) throws AnalysisEngineProcessException {
     List<Feature> feats = new ArrayList<>();
     
-    feats.add(new Feature("MC_MENTION_SALIENCE", mention.getConfidence()));
-
     double maxSalience = 0.0;
     double totalSalience = 0.0;
     double productSalience = 1.0;



Mime
View raw message