ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From seanfi...@apache.org
Subject svn commit: r1807533 - in /ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference: ae/ ae/pairing/cluster/ util/
Date Wed, 06 Sep 2017 21:44:13 GMT
Author: seanfinan
Date: Wed Sep  6 21:44:12 2017
New Revision: 1807533

URL: http://svn.apache.org/viewvc?rev=1807533&view=rev
Log:
added parameter to select single or multiple document coref
some refactoring to get rid of duplicate code

Added:
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterMentionFetcher.java
Modified:
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterPairer.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ExactStringPairer.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/HeadwordPairer.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SectionHeaderPairer.java
    ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SentenceDistancePairer.java

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java?rev=1807533&r1=1807532&r2=1807533&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterCoreferenceAnnotator.java Wed Sep  6 21:44:12 2017
@@ -1,58 +1,23 @@
 package org.apache.ctakes.coreference.ae;
 
-import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.*;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
-
 import org.apache.ctakes.core.pipeline.PipeBitInfo;
-import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
+import org.apache.ctakes.core.util.DotLogger;
 import org.apache.ctakes.core.util.ListFactory;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAgreementFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAttributeFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterDepHeadExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterSalienceFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterSectionFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterSemTypeDepPrefsFeatureExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterStackFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterStringFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterUMLSFeatureExtractor;
-import org.apache.ctakes.coreference.ae.pairing.cluster.ClusterMentionPairer_ImplBase;
-import org.apache.ctakes.coreference.ae.pairing.cluster.ClusterPairer;
-import org.apache.ctakes.coreference.ae.pairing.cluster.HeadwordPairer;
-import org.apache.ctakes.coreference.ae.pairing.cluster.SectionHeaderPairer;
-import org.apache.ctakes.coreference.ae.pairing.cluster.SentenceDistancePairer;
+import org.apache.ctakes.coreference.ae.features.cluster.*;
+import org.apache.ctakes.coreference.ae.pairing.cluster.*;
+import org.apache.ctakes.coreference.util.ClusterMentionFetcher;
 import org.apache.ctakes.coreference.util.MarkableUtilities;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
 import org.apache.ctakes.relationextractor.eval.RelationExtractorEvaluation.HashableArguments;
 import org.apache.ctakes.temporal.utils.PatientViewsUtil;
-import org.apache.ctakes.typesystem.type.refsem.AnatomicalSite;
-import org.apache.ctakes.typesystem.type.refsem.DiseaseDisorder;
-import org.apache.ctakes.typesystem.type.refsem.Element;
-import org.apache.ctakes.typesystem.type.refsem.Event;
-import org.apache.ctakes.typesystem.type.refsem.Medication;
-import org.apache.ctakes.typesystem.type.refsem.Procedure;
-import org.apache.ctakes.typesystem.type.refsem.SignSymptom;
+import org.apache.ctakes.typesystem.type.refsem.*;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelationIdentifiedAnnotationRelation;
 import org.apache.ctakes.typesystem.type.relation.CoreferenceRelation;
-import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
-import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
-import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.Markable;
-import org.apache.ctakes.typesystem.type.textsem.MedicationMention;
-import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
-import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
+import org.apache.ctakes.typesystem.type.textsem.*;
 import org.apache.ctakes.typesystem.type.textspan.Segment;
 import org.apache.ctakes.utils.struct.CounterMap;
+import org.apache.log4j.Logger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -65,16 +30,23 @@ import org.apache.uima.jcas.cas.EmptyFSL
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.cas.NonEmptyFSList;
 import org.apache.uima.resource.ResourceInitializationException;
-import org.cleartk.ml.CleartkAnnotator;
-import org.cleartk.ml.CleartkProcessingException;
-import org.cleartk.ml.DataWriter;
-import org.cleartk.ml.Feature;
-import org.cleartk.ml.Instance;
+import org.cleartk.ml.*;
 import org.cleartk.ml.feature.extractor.FeatureExtractor1;
 import org.cleartk.ml.jar.DefaultDataWriterFactory;
 import org.cleartk.ml.jar.DirectoryDataWriterFactory;
 import org.cleartk.ml.jar.GenericJarClassifierFactory;
-import org.cleartk.util.ViewUriUtil;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.*;
+import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
+
+
+
 @PipeBitInfo(
 	      name = "Coreference (Clusters)",
 	      description = "Coreference annotator using mention-synchronous paradigm.",
@@ -82,6 +54,8 @@ import org.cleartk.util.ViewUriUtil;
    	      products = { COREFERENCE_RELATION }
 	)
 public class MentionClusterCoreferenceAnnotator extends CleartkAnnotator<String> {
+  static private final Logger LOGGER = Logger.getLogger( MentionClusterCoreferenceAnnotator.class.getSimpleName() );
+
   public static final String NO_RELATION_CATEGORY = "-NONE-";
   public static final String PARAM_PROBABILITY_OF_KEEPING_A_NEGATIVE_EXAMPLE =
       "ProbabilityOfKeepingANegativeExample";
@@ -96,7 +70,15 @@ public class MentionClusterCoreferenceAn
       mandatory=false,
       description = "Whether to use encoders in output directory during data writing; if we are making multiple calls")
   private boolean useExistingEncoders=false;
-      
+
+  public static final String PARAM_SINGLE_DOCUMENT = "SingleDocument";
+  @ConfigurationParameter(
+        name = PARAM_SINGLE_DOCUMENT,
+        mandatory = false,
+        description = "Specify that coreferences should be sought for a single document.",
+        defaultValue = "true" )
+  private boolean singleDocument;
+
   protected Random coin = new Random(0);
 
   boolean greedyFirst = true;
@@ -218,154 +200,157 @@ public class MentionClusterCoreferenceAn
   }
   
   @Override
-  public void process(JCas docCas) throws AnalysisEngineProcessException {
-    
+  public void process( final JCas jCas ) throws AnalysisEngineProcessException {
+
     //this.dataWriter.write(new Instance<String>("#DEBUG " + ViewUriUtil.getURI(docCas)));
-    
+    LOGGER.info( "Finding Coreferences ..." );
+
+    if ( singleDocument ) {
+      processDocument( jCas );
+      LOGGER.info( "Finished." );
+      return;
+    }
+
     int numDocs;
     try {
-      numDocs = Integer.valueOf(docCas.getView(PatientViewsUtil.NUM_DOCS_NAME).getDocumentText());
-    } catch (NumberFormatException | CASException e) {
+      numDocs = Integer.valueOf( jCas.getView( PatientViewsUtil.NUM_DOCS_NAME ).getDocumentText() );
+    } catch ( NumberFormatException | CASException e ) {
+      // TODO remove stack trace when ready
       e.printStackTrace();
-      throw new AnalysisEngineProcessException(e);
+      throw new AnalysisEngineProcessException( e );
     }
-    
-    for(int docNum = 0; docNum < numDocs; docNum++){
-      JCas jCas;
-      try {
-        jCas = docCas.getView(PatientViewsUtil.getViewName(docNum));
-      } catch (CASException e) {
-        e.printStackTrace();
-        throw new AnalysisEngineProcessException(e);
+    try ( DotLogger dotter = new DotLogger() ) {
+      for ( int docNum = 0; docNum < numDocs; docNum++ ) {
+        JCas docCas;
+        try {
+          docCas = jCas.getView( PatientViewsUtil.getViewName( docNum ) );
+        } catch ( CASException casE ) {
+          // TODO remove stack trace when ready
+          casE.printStackTrace();
+          throw new AnalysisEngineProcessException( casE );
+        }
+        processDocument( docCas );
       }
-      // lookup from pair of annotations to binary text relation
-      // note: assumes that there will be at most one relation per pair
-      this.resetPairers(jCas);
-      Map<CollectionTextRelationIdentifiedAnnotationPair, CollectionTextRelationIdentifiedAnnotationRelation> relationLookup;
+    } catch ( IOException ioE ) {
+      LOGGER.error( ioE.getMessage() );
+    }
+    LOGGER.info( "Finished." );
+  }
+
+  private void processDocument( final JCas jCas ) throws AnalysisEngineProcessException {
+    // lookup from pair of annotations to binary text relation
+    // note: assumes that there will be at most one relation per pair
+    this.resetPairers( jCas );
+    Map<CollectionTextRelationIdentifiedAnnotationPair, CollectionTextRelationIdentifiedAnnotationRelation>
+          relationLookup;
+    if ( this.isTraining() ) {
+      relationLookup = ClusterMentionFetcher.getPairRelations( jCas );
+    } else {
       relationLookup = new HashMap<>();
-      if (this.isTraining()) {
-        for (CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)) {
-          for(IdentifiedAnnotation mention : JCasUtil.select(cluster.getMembers(), Markable.class)){
-            CollectionTextRelationIdentifiedAnnotationRelation relation = 
-                new CollectionTextRelationIdentifiedAnnotationRelation(jCas);
-            relation.setCluster(cluster);
-            relation.setMention(mention);
-            relation.setCategory("CoreferenceClusterMember");
-            relation.addToIndexes();
-            // The key is a list of args so we can do bi-directional lookup
-            CollectionTextRelationIdentifiedAnnotationPair key = new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention);
-            if(relationLookup.containsKey(key)){
-              String cat = relationLookup.get(key).getCategory();
-              System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
-              System.err.println("Error! This attempted relation " + relation.getCategory() + " already has a relation " + cat + " at this span: " + mention.getCoveredText());
+    }
+
+
+    for ( Segment segment : JCasUtil.select( jCas, Segment.class ) ) {
+      for ( Markable mention : JCasUtil.selectCovered( jCas, Markable.class, segment ) ) {
+        //        ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, mention);
+        boolean singleton = true;
+        double maxScore = 0.0;
+        CollectionTextRelation maxCluster = null;
+
+        for ( CollectionTextRelationIdentifiedAnnotationPair pair : this.getCandidateRelationArgumentPairs( jCas, mention ) ) {
+          CollectionTextRelation cluster = pair.getCluster();
+          // apply all the feature extractors to extract the list of features
+          List<Feature> features = new ArrayList<>();
+          for ( RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> extractor : this.relationExtractors ) {
+            List<Feature> feats = extractor.extract( jCas, cluster, mention );
+            if ( feats != null ) {
+              //              Logger.getRootLogger().info(String.format("For cluster with %d mentions, %d %s features", JCasUtil.select(cluster.getMembers(), Markable.class).size(), feats.size(), extractor.getClass().getSimpleName()));
+              features.addAll( feats );
             }
-            relationLookup.put(key, relation);
           }
-        }
-      }
 
+          for ( FeatureExtractor1<Markable> extractor : this.mentionExtractors ) {
+            features.addAll( extractor.extract( jCas, mention ) );
+          }
 
-      for(Segment segment : JCasUtil.select(jCas, Segment.class)){
-        for(Markable mention : JCasUtil.selectCovered(jCas, Markable.class, segment)){
-          //        ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, mention);
-          boolean singleton = true;
-          double maxScore = 0.0;
-          CollectionTextRelation maxCluster = null;
-
-          for(CollectionTextRelationIdentifiedAnnotationPair pair : this.getCandidateRelationArgumentPairs(jCas, mention)){
-            CollectionTextRelation cluster = pair.getCluster();
-            // apply all the feature extractors to extract the list of features
-            List<Feature> features = new ArrayList<>();
-            for (RelationFeaturesExtractor<CollectionTextRelation,IdentifiedAnnotation> extractor : this.relationExtractors) {
-              List<Feature> feats = extractor.extract(jCas, cluster, mention);
-              if (feats != null){
-                //              Logger.getRootLogger().info(String.format("For cluster with %d mentions, %d %s features", JCasUtil.select(cluster.getMembers(), Markable.class).size(), feats.size(), extractor.getClass().getSimpleName()));
-                features.addAll(feats);
-              }
+          // here is where feature conjunctions can go (dupFeatures)
+          List<Feature> dupFeatures = new ArrayList<>();
+          // sanity check on feature values
+          for ( Feature feature : features ) {
+            if ( feature.getValue() == null ) {
+              feature.setValue( "NULL" );
+              String message = String.format( "Null value found in %s from %s", feature, features );
+              System.err.println( message );
             }
+          }
 
-            for(FeatureExtractor1<Markable> extractor : this.mentionExtractors){
-              features.addAll(extractor.extract(jCas, mention));
-            }
+          features.addAll( dupFeatures );
 
-            // here is where feature conjunctions can go (dupFeatures)
-            List<Feature> dupFeatures = new ArrayList<>();
-            // sanity check on feature values
-            for (Feature feature : features) {
-              if (feature.getValue() == null) {
-                feature.setValue("NULL");
-                String message = String.format("Null value found in %s from %s", feature, features);
-                System.err.println(message);
-              }            
+          // during training, feed the features to the data writer
+          if ( this.isTraining() ) {
+            String category = this.getRelationCategory( relationLookup, cluster, mention );
+            if ( category == null ) {
+              continue;
             }
 
-            features.addAll(dupFeatures);
-
-            // during training, feed the features to the data writer
-            if (this.isTraining()) {
-              String category = this.getRelationCategory(relationLookup, cluster, mention);
-              if (category == null) {
-                continue;
-              }
+            // create a classification instance and write it to the training data
+            this.dataWriter.write( new Instance<>( category, features ) );
+            if ( !category.equals( NO_RELATION_CATEGORY ) ) {
+              singleton = false;
+              break;
+            }
+          }
 
-              // create a classification instance and write it to the training data
-              this.dataWriter.write(new Instance<>(category, features));
-              if(!category.equals(NO_RELATION_CATEGORY)){
+          // during classification feed the features to the classifier and create
+          // annotations
+          else {
+            String predictedCategory = this.classify( features );
+            // TODO look at scores in classifier and try best-pair rather than first-pair?
+            Map<String, Double> scores = this.classifier.score( features );
+
+            // add a relation annotation if a true relation was predicted
+            if ( !predictedCategory.equals( NO_RELATION_CATEGORY ) ) {
+              //              Logger.getLogger("MCAnnotator").info(String.format("Making a pair with score %f", scores.get(predictedCategory)));
+              if ( greedyFirst ) {
+                createRelation( jCas, cluster, mention, predictedCategory, scores.get( predictedCategory ) );
                 singleton = false;
+                // break here for "closest-first" greedy decoding strategy (Soon et al., 2001), terminology from Lasalle and Denis (2013),
+                // for "best first" need to keep track of all relations with scores and only keep the highest
                 break;
               }
-            }
-
-            // during classification feed the features to the classifier and create
-            // annotations
-            else {
-              String predictedCategory = this.classify(features);
-              // TODO look at scores in classifier and try best-pair rather than first-pair?
-              Map<String,Double> scores = this.classifier.score(features);
-
-              // add a relation annotation if a true relation was predicted
-              if (!predictedCategory.equals(NO_RELATION_CATEGORY)) {
-                //              Logger.getLogger("MCAnnotator").info(String.format("Making a pair with score %f", scores.get(predictedCategory)));
-                if(greedyFirst){
-                  createRelation(jCas, cluster, mention, predictedCategory, scores.get(predictedCategory));
-                  singleton = false;
-                  // break here for "closest-first" greedy decoding strategy (Soon et al., 2001), terminology from Lasalle and Denis (2013),
-                  // for "best first" need to keep track of all relations with scores and only keep the highest
-                  break;
-                }
-                if(scores.get(predictedCategory) > maxScore){
-                  maxScore = scores.get(predictedCategory);
-                  maxCluster = cluster;
-                }
+              if ( scores.get( predictedCategory ) > maxScore ) {
+                maxScore = scores.get( predictedCategory );
+                maxCluster = cluster;
               }
             }
           }
-          if(!this.isTraining() && !greedyFirst && maxCluster != null){
-            // make a link with the max cluster
-            createRelation(jCas, maxCluster, mention, "CoreferenceClusterMember", maxScore);
-          }
+        }
+        if ( !this.isTraining() && !greedyFirst && maxCluster != null ) {
+          // make a link with the max cluster
+          createRelation( jCas, maxCluster, mention, "CoreferenceClusterMember", maxScore );
+        }
 
-          // if we got this far and never matched up the markable then add it to list.
-          // do this even during training -- adds non-chain markables to antecedent list which will be seen during testing.
-          if(singleton){
-            // make the markable it's own cluster:
-            CollectionTextRelation chain = new CollectionTextRelation(jCas);
-            chain.setCategory("Identity");
-            NonEmptyFSList list = new NonEmptyFSList(jCas);
-            list.setHead(mention);
-            list.setTail(new EmptyFSList(jCas));
-            chain.setMembers(list);
-            chain.addToIndexes();
-            list.addToIndexes();
-            list.getTail().addToIndexes();
-          }
+        // if we got this far and never matched up the markable then add it to list.
+        // do this even during training -- adds non-chain markables to antecedent list which will be seen during testing.
+        if ( singleton ) {
+          // make the markable it's own cluster:
+          CollectionTextRelation chain = new CollectionTextRelation( jCas );
+          chain.setCategory( "Identity" );
+          NonEmptyFSList list = new NonEmptyFSList( jCas );
+          list.setHead( mention );
+          list.setTail( new EmptyFSList( jCas ) );
+          chain.setMembers( list );
+          chain.addToIndexes();
+          list.addToIndexes();
+          list.getTail().addToIndexes();
         }
       }
+    }
 
-      removeSingletonClusters(jCas);
+    removeSingletonClusters( jCas );
+
+    createEventClusters( jCas );
 
-      createEventClusters(jCas);
-    }
   }
   
  
@@ -413,10 +398,10 @@ public class MentionClusterCoreferenceAn
    * 
    * @param jCas
    *          - JCas object, needed to create new UIMA types
-   * @param arg1
-   *          - First argument to relation
-   * @param arg2
-   *          - Second argument to relation
+  //   * @param arg1
+  //   *          - First argument to relation
+  //   * @param arg2
+  //   *          - Second argument to relation
    * @param predictedCategory
    *          - Name of relation
    */
@@ -570,35 +555,35 @@ public class MentionClusterCoreferenceAn
     }
     return scoreMap;
   }
-  
-  public static class CollectionTextRelationIdentifiedAnnotationPair {
-    private final CollectionTextRelation cluster;
-    private final IdentifiedAnnotation mention;
-    
-    public CollectionTextRelationIdentifiedAnnotationPair(CollectionTextRelation cluster, IdentifiedAnnotation mention){
-      this.cluster = cluster;
-      this.mention = mention;
-    }
-    
-    public final CollectionTextRelation getCluster(){
-      return this.cluster;
-    }
-    
-    public final IdentifiedAnnotation getMention(){
-      return this.mention;
-    }
-    
-    @Override
-    public boolean equals(Object obj) {
-      CollectionTextRelationIdentifiedAnnotationPair other = (CollectionTextRelationIdentifiedAnnotationPair) obj;
-      return (this.cluster == other.cluster &&
-          this.mention == other.mention);
-    }
-    
-    @Override
-    public int hashCode() {
-      return 31*cluster.hashCode() + (mention==null ? 0 : mention.hashCode());
-    }
-  }
+
+//  public static class CollectionTextRelationIdentifiedAnnotationPair {
+//    private final CollectionTextRelation cluster;
+//    private final IdentifiedAnnotation mention;
+//
+//    public CollectionTextRelationIdentifiedAnnotationPair(CollectionTextRelation cluster, IdentifiedAnnotation mention){
+//      this.cluster = cluster;
+//      this.mention = mention;
+//    }
+//
+//    public final CollectionTextRelation getCluster(){
+//      return this.cluster;
+//    }
+//
+//    public final IdentifiedAnnotation getMention(){
+//      return this.mention;
+//    }
+//
+//    @Override
+//    public boolean equals(Object obj) {
+//      CollectionTextRelationIdentifiedAnnotationPair other = (CollectionTextRelationIdentifiedAnnotationPair) obj;
+//      return (this.cluster == other.cluster &&
+//          this.mention == other.mention);
+//    }
+//
+//    @Override
+//    public int hashCode() {
+//      return 31*cluster.hashCode() + (mention==null ? 0 : mention.hashCode());
+//    }
+//  }
 
 }

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java?rev=1807533&r1=1807532&r2=1807533&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java Wed Sep  6 21:44:12 2017
@@ -1,30 +1,9 @@
 package org.apache.ctakes.coreference.ae;
 
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-
 import org.apache.ctakes.core.pipeline.PipeBitInfo;
 import org.apache.ctakes.core.util.ListFactory;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAgreementFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterAttributeFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterDepHeadExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterDistSemExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterMentionFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterSalienceFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterSectionFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterSemTypeDepPrefsFeatureExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterStackFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterStringFeaturesExtractor;
-import org.apache.ctakes.coreference.ae.features.cluster.MentionClusterUMLSFeatureExtractor;
+import org.apache.ctakes.coreference.ae.features.cluster.*;
+import org.apache.ctakes.coreference.util.ClusterMentionFetcher;
 import org.apache.ctakes.coreference.util.ClusterUtils;
 import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
@@ -61,11 +40,15 @@ import org.cleartk.ml.jar.DefaultDataWri
 import org.cleartk.ml.jar.DirectoryDataWriterFactory;
 import org.cleartk.ml.jar.GenericJarClassifierFactory;
 import org.cleartk.ml.svmlight.rank.QidInstance;
-import org.cleartk.util.ViewUriUtil;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
 
 import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.*;
-import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.COREFERENCE_RELATION;
-import static org.apache.ctakes.core.pipeline.PipeBitInfo.TypeProduct.MARKABLE;
+import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
+
+// TODO Consolidate all of the duplicate code in the coref module
 
 @PipeBitInfo(
       name = "Coreference (Cluster Rank)",
@@ -343,18 +326,9 @@ public class MentionClusterRankingCorefe
     }
     String head = headNode.getCoveredText().toLowerCase();
     if(headWordMarkables.containsKey(head)){
-      Set<Markable> headSet = headWordMarkables.get(head);
-      for(CollectionTextRelation cluster : JCasUtil.select(jcas, CollectionTextRelation.class)){
-        Annotation mostRecent = ClusterUtils.getMostRecent((NonEmptyFSList)cluster.getMembers(), mention);
-        if(mostRecent == null) continue;
-        for(Markable m : JCasUtil.select(cluster.getMembers(), Markable.class)){
-          if(headSet.contains(mostRecent)){
-            pairs.add(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention));
-            break;
-          }
-          if(m == mostRecent) break;
-        }
-      }      
+      final Set<Markable> headSet = headWordMarkables.get( head );
+
+      ClusterMentionFetcher.populatePairs( jcas, mention, headSet, pairs );
     }
     
     return pairs;
@@ -370,31 +344,17 @@ public class MentionClusterRankingCorefe
 //    pairScores = getMarkablePairScores(jCas);
     
     Map<CollectionTextRelationIdentifiedAnnotationPair, CollectionTextRelationIdentifiedAnnotationRelation> relationLookup;
-    relationLookup = new HashMap<>();
     if (this.isTraining()) {
-      for (CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)) {
-        for(IdentifiedAnnotation mention : JCasUtil.select(cluster.getMembers(), Markable.class)){
-          CollectionTextRelationIdentifiedAnnotationRelation relation = 
-              new CollectionTextRelationIdentifiedAnnotationRelation(jCas);
-          relation.setCluster(cluster);
-          relation.setMention(mention);
-          relation.setCategory("CoreferenceClusterMember");
-          relation.addToIndexes();
-          // The key is a list of args so we can do bi-directional lookup
-          CollectionTextRelationIdentifiedAnnotationPair key = new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention);
-          if(relationLookup.containsKey(key)){
-            String cat = relationLookup.get(key).getCategory();
-            System.err.println("Error in: "+ ViewUriUtil.getURI(jCas).toString());
-            System.err.println("Error! This attempted relation " + relation.getCategory() + " already has a relation " + cat + " at this span: " + mention.getCoveredText());
-          }
-          relationLookup.put(key, relation);
-        }
-      }
+      relationLookup = ClusterMentionFetcher.getPairRelations( jCas );
+    } else {
+      relationLookup = new HashMap<>();
     }
+    final Map<Segment, Collection<Markable>> segmentMarkables = JCasUtil.indexCovered( jCas, Segment.class, Markable.class );
 
-    
-    for(Segment segment : JCasUtil.select(jCas, Segment.class)){
-      for(Markable mention : JCasUtil.selectCovered(jCas, Markable.class, segment)){
+//    for(Segment segment : JCasUtil.select(jCas, Segment.class)){
+//      for(Markable mention : JCasUtil.selectCovered(jCas, Markable.class, segment)){
+    for ( Collection<Markable> markables : segmentMarkables.values() ) {
+      for ( Markable mention : markables ) {
         ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, mention);
         String mentionText = mention.getCoveredText().toLowerCase();
         boolean singleton = true;
@@ -573,10 +533,10 @@ public class MentionClusterRankingCorefe
    * 
    * @param jCas
    *          - JCas object, needed to create new UIMA types
-   * @param arg1
-   *          - First argument to relation
-   * @param arg2
-   *          - Second argument to relation
+  //   * @param arg1
+  //   *          - First argument to relation
+  //   * @param arg2
+  //   *          - Second argument to relation
    * @param predictedCategory
    *          - Name of relation
    */
@@ -665,35 +625,35 @@ public class MentionClusterRankingCorefe
     }
     return scoreMap;
   }
-  
-  public static class CollectionTextRelationIdentifiedAnnotationPair {
-    private final CollectionTextRelation cluster;
-    private final IdentifiedAnnotation mention;
-    
-    public CollectionTextRelationIdentifiedAnnotationPair(CollectionTextRelation cluster, IdentifiedAnnotation mention){
-      this.cluster = cluster;
-      this.mention = mention;
-    }
-    
-    public final CollectionTextRelation getCluster(){
-      return this.cluster;
-    }
-    
-    public final IdentifiedAnnotation getMention(){
-      return this.mention;
-    }
-    
-    @Override
-    public boolean equals(Object obj) {
-      CollectionTextRelationIdentifiedAnnotationPair other = (CollectionTextRelationIdentifiedAnnotationPair) obj;
-      return (this.cluster == other.cluster &&
-          this.mention == other.mention);
-    }
-    
-    @Override
-    public int hashCode() {
-      return 31*cluster.hashCode() + (mention==null ? 0 : mention.hashCode());
-    }
-  }
+
+//  public static class CollectionTextRelationIdentifiedAnnotationPair {
+//    private final CollectionTextRelation cluster;
+//    private final IdentifiedAnnotation mention;
+//
+//    public CollectionTextRelationIdentifiedAnnotationPair(CollectionTextRelation cluster, IdentifiedAnnotation mention){
+//      this.cluster = cluster;
+//      this.mention = mention;
+//    }
+//
+//    public final CollectionTextRelation getCluster(){
+//      return this.cluster;
+//    }
+//
+//    public final IdentifiedAnnotation getMention(){
+//      return this.mention;
+//    }
+//
+//    @Override
+//    public boolean equals(Object obj) {
+//      CollectionTextRelationIdentifiedAnnotationPair other = (CollectionTextRelationIdentifiedAnnotationPair) obj;
+//      return (this.cluster == other.cluster &&
+//          this.mention == other.mention);
+//    }
+//
+//    @Override
+//    public int hashCode() {
+//      return 31*cluster.hashCode() + (mention==null ? 0 : mention.hashCode());
+//    }
+//  }
 
 }

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java?rev=1807533&r1=1807532&r2=1807533&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterMentionPairer_ImplBase.java Wed Sep  6 21:44:12 2017
@@ -1,6 +1,6 @@
 package org.apache.ctakes.coreference.ae.pairing.cluster;
 
-import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
+//import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
 import org.apache.ctakes.coreference.ae.pairing.AnnotationPairer;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
@@ -14,6 +14,7 @@ import org.apache.uima.jcas.tcas.Annotat
 import java.util.*;
 
 import static org.apache.ctakes.coreference.ae.MarkableHeadTreeCreator.getKey;
+import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
 
 //import org.apache.ctakes.dependency.parser.util.DependencyUtility;
 

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterPairer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterPairer.java?rev=1807533&r1=1807532&r2=1807533&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterPairer.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ClusterPairer.java Wed Sep  6 21:44:12 2017
@@ -1,10 +1,6 @@
 package org.apache.ctakes.coreference.ae.pairing.cluster;
 
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.ctakes.coreference.ae.EventCoreferenceAnnotator;
-import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
 import org.apache.ctakes.coreference.util.ClusterUtils;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -14,6 +10,13 @@ import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.NonEmptyFSList;
 import org.apache.uima.jcas.tcas.Annotation;
 
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
+
+//import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
+
 public class ClusterPairer extends ClusterMentionPairer_ImplBase {
   private int sentDist;
   public ClusterPairer(int dist){

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ExactStringPairer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ExactStringPairer.java?rev=1807533&r1=1807532&r2=1807533&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ExactStringPairer.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/ExactStringPairer.java Wed Sep  6 21:44:12 2017
@@ -1,11 +1,5 @@
 package org.apache.ctakes.coreference.ae.pairing.cluster;
 
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
 import org.apache.ctakes.coreference.util.ClusterUtils;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.textsem.Markable;
@@ -14,6 +8,15 @@ import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.NonEmptyFSList;
 import org.apache.uima.jcas.tcas.Annotation;
 
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
+
+//import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
+
 public class ExactStringPairer extends ClusterMentionPairer_ImplBase {
 
   private Set<String> markableStrings = null;

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/HeadwordPairer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/HeadwordPairer.java?rev=1807533&r1=1807532&r2=1807533&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/HeadwordPairer.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/HeadwordPairer.java Wed Sep  6 21:44:12 2017
@@ -1,24 +1,18 @@
 package org.apache.ctakes.coreference.ae.pairing.cluster;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
 import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator;
-import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
-import org.apache.ctakes.coreference.util.ClusterUtils;
+import org.apache.ctakes.coreference.util.ClusterMentionFetcher;
 import org.apache.ctakes.dependency.parser.util.DependencyUtility;
-import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
 import org.apache.ctakes.typesystem.type.textsem.Markable;
 import org.apache.log4j.Logger;
-import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.NonEmptyFSList;
-import org.apache.uima.jcas.tcas.Annotation;
+
+import java.util.*;
+
+import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
+
+//import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
 
 public class HeadwordPairer extends ClusterMentionPairer_ImplBase {
   private Map<String, Set<Markable>> headWordMarkables = null;
@@ -40,19 +34,9 @@ public class HeadwordPairer extends Clus
     }
     String head = headNode.getCoveredText().toLowerCase();
     if(headWordMarkables.containsKey(head)){
-      Set<Markable> headSet = headWordMarkables.get(head);
-      for(CollectionTextRelation cluster : JCasUtil.select(jcas, CollectionTextRelation.class)){
-        Annotation mostRecent = ClusterUtils.getMostRecent((NonEmptyFSList)cluster.getMembers(), mention);
-        if(mostRecent == null) continue;
-        for(Markable m : JCasUtil.select(cluster.getMembers(), Markable.class)){
-          if(headSet.contains(mostRecent)){
-            pairs.add(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention));
-            break;
-          }
-          if(m == mostRecent) break;
-        }
-      }      
-    }else{    
+       final Set<Markable> headSet = headWordMarkables.get( head );
+       ClusterMentionFetcher.populatePairs( jcas, mention, headSet, pairs );
+    } else {
       headWordMarkables.put(head, new HashSet<Markable>());
     }
     headWordMarkables.get(head).add(mention);

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SectionHeaderPairer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SectionHeaderPairer.java?rev=1807533&r1=1807532&r2=1807533&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SectionHeaderPairer.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SectionHeaderPairer.java Wed Sep  6 21:44:12 2017
@@ -1,10 +1,6 @@
 package org.apache.ctakes.coreference.ae.pairing.cluster;
 
-import java.util.ArrayList;
-import java.util.List;
-
 import org.apache.ctakes.coreference.ae.EventCoreferenceAnnotator;
-import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
 import org.apache.ctakes.coreference.util.ClusterUtils;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
@@ -16,6 +12,14 @@ import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.NonEmptyFSList;
 import org.apache.uima.jcas.tcas.Annotation;
 
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
+
+//import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
+
+
 public class SectionHeaderPairer extends ClusterMentionPairer_ImplBase {
 
   private int sentDist;

Modified: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SentenceDistancePairer.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SentenceDistancePairer.java?rev=1807533&r1=1807532&r2=1807533&view=diff
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SentenceDistancePairer.java (original)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/ae/pairing/cluster/SentenceDistancePairer.java Wed Sep  6 21:44:12 2017
@@ -1,11 +1,6 @@
 package org.apache.ctakes.coreference.ae.pairing.cluster;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-
 import org.apache.ctakes.coreference.ae.EventCoreferenceAnnotator;
-import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
 import org.apache.ctakes.coreference.util.ClusterUtils;
 import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
 import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
@@ -17,6 +12,14 @@ import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.NonEmptyFSList;
 import org.apache.uima.jcas.tcas.Annotation;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import static org.apache.ctakes.coreference.util.ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair;
+
+//import org.apache.ctakes.coreference.ae.MentionClusterCoreferenceAnnotator.CollectionTextRelationIdentifiedAnnotationPair;
+
 public class SentenceDistancePairer extends ClusterMentionPairer_ImplBase {
 
   private int sentDistance;
@@ -31,7 +34,7 @@ public class SentenceDistancePairer exte
    * the mention and the latest element of the cluster.
    */
   @Override
-  public List<CollectionTextRelationIdentifiedAnnotationPair> getPairs(JCas jcas, Markable mention){
+  public List<CollectionTextRelationIdentifiedAnnotationPair> getPairs( JCas jcas, Markable mention ) {
     List<CollectionTextRelationIdentifiedAnnotationPair> pairs = new ArrayList<>();
     Set<String> bestAnaTypes = getBestEnt(jcas, (Markable) mention);
     

Added: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterMentionFetcher.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterMentionFetcher.java?rev=1807533&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterMentionFetcher.java (added)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterMentionFetcher.java Wed Sep  6 21:44:12 2017
@@ -0,0 +1,119 @@
+package org.apache.ctakes.coreference.util;
+
+
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelationIdentifiedAnnotationRelation;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.log4j.Logger;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSList;
+import org.apache.uima.jcas.cas.NonEmptyFSList;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.util.ViewUriUtil;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+
+/**
+ * @author SPF , chip-nlp
+ * @version %I%
+ * @since 9/6/2017
+ */
+final public class ClusterMentionFetcher {
+
+   static private final Logger LOGGER = Logger.getLogger( "ClusterMentionFetcher" );
+
+   private ClusterMentionFetcher() {
+   }
+
+   static public Map<CollectionTextRelationIdentifiedAnnotationPair,
+         CollectionTextRelationIdentifiedAnnotationRelation> getPairRelations( final JCas jCas )
+         throws AnalysisEngineProcessException {
+
+      final Map<CollectionTextRelationIdentifiedAnnotationPair,
+            CollectionTextRelationIdentifiedAnnotationRelation> relationLookup = new HashMap<>();
+
+      for ( CollectionTextRelation cluster : JCasUtil.select( jCas, CollectionTextRelation.class ) ) {
+         for ( IdentifiedAnnotation mention : JCasUtil.select( cluster.getMembers(), Markable.class ) ) {
+            final CollectionTextRelationIdentifiedAnnotationRelation relation =
+                  new CollectionTextRelationIdentifiedAnnotationRelation( jCas );
+            relation.setCluster( cluster );
+            relation.setMention( mention );
+            relation.setCategory( "CoreferenceClusterMember" );
+            relation.addToIndexes();
+            // The key is a list of args so we can do bi-directional lookup
+            final CollectionTextRelationIdentifiedAnnotationPair key = new CollectionTextRelationIdentifiedAnnotationPair( cluster, mention );
+            if ( relationLookup.containsKey( key ) ) {
+               String category = relationLookup.get( key ).getCategory();
+               System.err.println( "Error in: " + ViewUriUtil.getURI( jCas ).toString() );
+               System.err.println( "Error! This attempted relation " + relation.getCategory() + " already has a relation " + category + " at this span: " + mention.getCoveredText() );
+            }
+            relationLookup.put( key, relation );
+         }
+      }
+      return relationLookup;
+   }
+
+   static public void populatePairs( final JCas jCas,
+                                     final IdentifiedAnnotation mention,
+                                     final Collection<Markable> headSet,
+                                     final Collection<CollectionTextRelationIdentifiedAnnotationPair> pairs ) {
+      for ( CollectionTextRelation cluster : JCasUtil.select( jCas, CollectionTextRelation.class ) ) {
+         final FSList members = cluster.getMembers();
+         final Annotation mostRecent = ClusterUtils.getMostRecent( (NonEmptyFSList) members, mention );
+         if ( mostRecent == null ) {
+            continue;
+         }
+         for ( Markable m : JCasUtil.select( members, Markable.class ) ) {
+            if ( headSet.contains( mostRecent ) ) {
+               pairs.add( new CollectionTextRelationIdentifiedAnnotationPair( cluster, mention ) );
+               break;
+            }
+            if ( m == mostRecent ) {
+               break;
+            }
+         }
+      }
+   }
+
+
+   public static class CollectionTextRelationIdentifiedAnnotationPair {
+      private final CollectionTextRelation cluster;
+      private final IdentifiedAnnotation mention;
+
+      public CollectionTextRelationIdentifiedAnnotationPair( final CollectionTextRelation cluster, final IdentifiedAnnotation mention ) {
+         this.cluster = cluster;
+         this.mention = mention;
+      }
+
+      public final CollectionTextRelation getCluster() {
+         return this.cluster;
+      }
+
+      public final IdentifiedAnnotation getMention() {
+         return this.mention;
+      }
+
+      @Override
+      public boolean equals( final Object object ) {
+         if ( !CollectionTextRelationIdentifiedAnnotationPair.class.isInstance( object ) ) {
+            return false;
+         }
+         final CollectionTextRelationIdentifiedAnnotationPair other
+               = (CollectionTextRelationIdentifiedAnnotationPair) object;
+         return this.cluster == other.cluster && this.mention == other.mention;
+      }
+
+      @Override
+      public int hashCode() {
+         return 31 * cluster.hashCode() + (mention == null ? 0 : mention.hashCode());
+      }
+   }
+
+
+}



Mime
View raw message