ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1720836 - in /ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae: ./ features/ features/cluster/
Date Fri, 18 Dec 2015 18:22:23 GMT
Author: tmill
Date: Fri Dec 18 18:22:23 2015
New Revision: 1720836

URL: http://svn.apache.org/viewvc?rev=1720836&view=rev
Log:
Minor changes to several features.

Modified:
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDistSemExtractor.java
    ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java?rev=1720836&r1=1720835&r2=1720836&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/MentionClusterRankingCoreferenceAnnotator.java
Fri Dec 18 18:22:23 2015
@@ -478,6 +478,11 @@ public class MentionClusterRankingCorefe
           // write a dummy link with only mention features:
           QidInstance<Double> inst = new QidInstance<>();
           inst.setQid(String.valueOf(qid));
+          for(Feature feat : mentionFeatures){
+            if(feat.getName() != null){
+              feat.setName("DUMMYLINK_" + feat.getName());
+            }
+          }
           inst.addAll(mentionFeatures);
           if(singleton){
             inst.setOutcome(1.0);

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java?rev=1720836&r1=1720835&r2=1720836&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/StringMatchingFeatureExtractor.java
Fri Dec 18 18:22:23 2015
@@ -87,8 +87,12 @@ public class StringMatchingFeatureExtrac
 	}
 
 	public static boolean wordSubstring(Set<String> t1, Set<String> t2){
-		// TODO
-		return false;
+	  for(String s1 : t1){
+	    for(String s2 : t2){
+	      if(s1.contains(s2) || s2.contains(s1)) return true;
+	    }
+	  }
+	  return false;
 	}
 	
 	public static Set<String> contentWords(Annotation a1){

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDistSemExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDistSemExtractor.java?rev=1720836&r1=1720835&r2=1720836&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDistSemExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDistSemExtractor.java
Fri Dec 18 18:22:23 2015
@@ -28,7 +28,11 @@ public class MentionClusterDistSemExtrac
   private WordEmbeddings words = null;
   
   public MentionClusterDistSemExtractor() throws FileNotFoundException, IOException{
-    words = WordVectorReader.getEmbeddings(FileLocator.getAsStream("org/apache/ctakes/coreference/distsem/mimic_vectors.txt"));
+    this("org/apache/ctakes/coreference/distsem/mimic_vectors.txt");
+  }
+  
+  public MentionClusterDistSemExtractor(String embeddingsPath) throws FileNotFoundException,
IOException{
+    words = WordVectorReader.getEmbeddings(FileLocator.getAsStream(embeddingsPath));
   }
 
   @Override
@@ -53,7 +57,7 @@ public class MentionClusterDistSemExtrac
         ConllDependencyNode memberNode = DependencyUtility.getNominalHeadNode(jCas, member);
         String memberHead = memberNode != null ? memberNode.getCoveredText().toLowerCase()
: null;
         if(mentionHead.equals(memberHead)){
-          maxSim = 1.0;
+          maxSim = 0.0;
           break;
         }
         if(memberNode != null && words.containsKey(memberHead) && words.containsKey(mentionHead)){

Modified: ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java?rev=1720836&r1=1720835&r2=1720836&view=diff
==============================================================================
--- ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
(original)
+++ ctakes/sandbox/ctakes-coref-cleartk/src/main/java/org/apache/ctakes/coreference/ae/features/cluster/MentionClusterMentionFeaturesExtractor.java
Fri Dec 18 18:22:23 2015
@@ -63,7 +63,7 @@ public class MentionClusterMentionFeatur
     // token features:
     feats.addAll(tokenContext.extract(view, focusAnnotation));
     
-    feats.add(new Feature("NumCoveredTokens_" + JCasUtil.selectCovered(BaseToken.class, focusAnnotation).size()));
+    feats.add(new Feature("NumCoveredTokens", JCasUtil.selectCovered(BaseToken.class, focusAnnotation).size()));
     
     // pos features:
     feats.addAll(mentionFeaturesExtractor.extract(view, focusAnnotation));



Mime
View raw message