ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1687535 - /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/ChainStackFeatureExtractor.java
Date Thu, 25 Jun 2015 14:32:13 GMT
Author: tmill
Date: Thu Jun 25 14:32:13 2015
New Revision: 1687535

URL: http://svn.apache.org/r1687535
Log:
Chain stack feature for pairwise coref model.

Added:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/ChainStackFeatureExtractor.java

Added: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/ChainStackFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/ChainStackFeatureExtractor.java?rev=1687535&view=auto
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/ChainStackFeatureExtractor.java
(added)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/ChainStackFeatureExtractor.java
Thu Jun 25 14:32:13 2015
@@ -0,0 +1,68 @@
+package org.apache.ctakes.coreference.ae.features;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.ctakes.core.util.ListIterable;
+import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
+import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
+import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.Markable;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+
+public class ChainStackFeatureExtractor implements RelationFeaturesExtractor<IdentifiedAnnotation,IdentifiedAnnotation>
{
+
+  @Override
+  public List<Feature> extract(JCas jCas, IdentifiedAnnotation ante,
+      IdentifiedAnnotation ana) throws AnalysisEngineProcessException {
+    List<Feature> feats =  new ArrayList<>();
+    int stackDepth = 0;
+    
+    Map<Markable,CollectionTextRelation> mark2chain = new HashMap<>();
+    for(CollectionTextRelation chain : JCasUtil.select(jCas, CollectionTextRelation.class)){
+      for(Markable m : new ListIterable<Markable>(chain.getMembers())){
+        mark2chain.put(m, chain);                
+      }
+    }
+
+    // TODO - actually just use a stack or priority queue here as iterating or something
+    // FOLLOWUP -- actually, this probably doesn't matter until we have a mention-cluster
model
+    // trying to use this as a feature in a mention-pair model will always give trivial answers?
+//    List<Markable> msBetween = JCasUtil.selectBetween(Markable.class, ante, ana);
+//    stackDepth = msBetween.size() + 1; // at worst, its  
+    
+    
+    
+    
+//    feats.add(new Feature("AntecedentStackDepth", stackDepth));
+    
+    // find how many other elements are in the chain with the antecedent
+    // we have to do this check because during training we will have oracle knowledge
+    // about chain length but we don't want to use it because obviously we don't have
+    // it at test time.
+    int chainSize = 0;
+    if(mark2chain.containsKey(ante)){
+      for(Markable m : new ListIterable<Markable>(mark2chain.get(ante).getMembers())){
+        if(m.getEnd() < ante.getEnd()){
+          chainSize++;
+        }else{
+          break;
+        }
+      }
+    }
+    if(chainSize > 0){
+      feats.add(new Feature("ChainStackAnteSize", Math.round(Math.log(chainSize))));
+    }else{
+      feats.add(new Feature("ChainStackAnteSingleton", true));
+    }
+      
+    
+    
+    return feats;
+  }
+}



Mime
View raw message