ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1748736 [5/5] - in /ctakes/trunk/ctakes-coreference: ./ src/main/java/org/apache/ctakes/coreference/ae/ src/main/java/org/apache/ctakes/coreference/ae/features/ src/main/java/org/apache/ctakes/coreference/ae/features/cluster/ src/main/java...
Date Thu, 16 Jun 2016 14:51:51 GMT
Added: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/extractors/ContinuousBag.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/extractors/ContinuousBag.java?rev=1748736&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/extractors/ContinuousBag.java
(added)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/extractors/ContinuousBag.java
Thu Jun 16 14:51:51 2016
@@ -0,0 +1,107 @@
+package org.apache.ctakes.coreference.extractors;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Bounds;
+import org.cleartk.ml.feature.extractor.CleartkExtractor.Context;
+import org.cleartk.ml.feature.extractor.CleartkExtractor;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.FeatureExtractor1;
+
+public class ContinuousBag implements Context {
+
+  private Context[] contexts;
+  private String name = null;
+  private Map<String, double[]> vectors = null;
+  private int dims;
+  
+  public ContinuousBag(File vecFile, Context... contexts) throws FileNotFoundException {
+    this.contexts = contexts;
+    this.vectors = readVectorFile(vecFile);
+//    String[] names = new String[contexts.length + 1];
+//    names[0] = "ContinuousBag";
+//    for (int i = 1; i < names.length; ++i) {
+//      names[i] = contexts[i - 1].getName();
+//    }
+    this.name = Feature.createName("ContinuousBag");
+  }
+  
+  private Map<String, double[]> readVectorFile(File vecFile) throws FileNotFoundException{
+    Map<String, double[]> vectorMap = new HashMap<>();
+    try(Scanner scanner = new Scanner(vecFile)){
+      while(scanner.hasNextLine()){
+        String[] termVec = scanner.nextLine().trim().split("\\s+");
+        if(termVec.length == 2) continue; // some files have the first line with the dimensions
+        dims = termVec.length-1;
+        double[] vector = new double[dims];
+        for(int i = 0; i < dims; i++){
+          vector[i] = Double.parseDouble(termVec[i+1]);
+        }
+        vectorMap.put(termVec[0], vector);
+      }
+    }
+    return vectorMap;
+  }
+  
+  public String getName() {
+    return this.name;
+  }
+
+  public <SEARCH_T extends Annotation> List<Feature> extract(JCas jCas, Annotation
focusAnnotation, Bounds bounds,
+      Class<SEARCH_T> annotationClass, FeatureExtractor1<SEARCH_T> extractor)
throws CleartkExtractorException {
+    List<Feature> feats = new ArrayList<>();
+    for (Context context : this.contexts) {
+      double[] contextVec = new double[dims];
+      int numComponents = 0;
+      for (Feature feature : context.extract(
+          jCas,
+          focusAnnotation,
+          bounds,
+          annotationClass,
+          extractor)) {
+        
+        if(this.vectors.containsKey(feature.getValue())){
+          double[] featVec = this.vectors.get(feature.getValue().toString().toLowerCase());
+          addToVector(contextVec, featVec);
+          numComponents++;
+        }
+      }
+      if(numComponents > 0){
+        for(int i = 0; i < dims; i++){
+          feats.add(new Feature(Feature.createName(this.name, context.getName(), String.valueOf(i)),
contextVec[i] / numComponents));
+        }
+      }
+    }
+    return feats;
+  }
+  
+  private static void addToVector(double[] vec1, double[] vec2){
+    for(int i = 0; i < vec1.length; i++){
+      vec1[i] += vec2[i];
+    }
+  }
+  
+  public static class Surrounding implements CleartkExtractor.Context {
+
+    public String getName() {
+      return "Surrounding";
+    }
+
+    public <SEARCH_T extends Annotation> List<Feature> extract(JCas jCas, Annotation
focusAnnotation, Bounds bounds,
+        Class<SEARCH_T> annotationClass, FeatureExtractor1<SEARCH_T> extractor)
throws CleartkExtractorException {
+      List<Feature> feats = new ArrayList<>();
+      
+      return feats;
+    }
+    
+  }
+}

Added: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/extractors/ContinuousTextExtractor.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/extractors/ContinuousTextExtractor.java?rev=1748736&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/extractors/ContinuousTextExtractor.java
(added)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/extractors/ContinuousTextExtractor.java
Thu Jun 16 14:51:51 2016
@@ -0,0 +1,61 @@
+package org.apache.ctakes.coreference.extractors;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Scanner;
+
+import org.apache.ctakes.core.resource.FileLocator;
+import org.apache.ctakes.typesystem.type.syntax.BaseToken;
+import org.apache.ctakes.utils.distsem.WordEmbeddings;
+import org.apache.ctakes.utils.distsem.WordVector;
+import org.apache.ctakes.utils.distsem.WordVectorReader;
+import org.apache.uima.jcas.JCas;
+import org.cleartk.ml.Feature;
+import org.cleartk.ml.feature.extractor.CleartkExtractorException;
+import org.cleartk.ml.feature.extractor.NamedFeatureExtractor1;
+
+public class ContinuousTextExtractor implements NamedFeatureExtractor1<BaseToken> 
{
+  private int dims;
+  private WordEmbeddings words = null;
+  
+  public ContinuousTextExtractor(String vecFile) throws CleartkExtractorException {
+    super();
+    try {
+      words = WordVectorReader.getEmbeddings(FileLocator.getAsStream(vecFile));
+    } catch (IOException e) {
+      e.printStackTrace();
+      throw new CleartkExtractorException(e);
+    }
+  }
+  
+  @Override
+  public List<Feature> extract(JCas view, BaseToken token) throws CleartkExtractorException
{
+    List<Feature> feats = new ArrayList<>();
+    
+    String wordText = token.getCoveredText();
+    WordVector vec = null;
+    if(words.containsKey(wordText)){
+      vec = words.getVector(wordText);
+    }else if(words.containsKey(wordText.toLowerCase())){
+      vec = words.getVector(wordText.toLowerCase());
+    }else{
+      return feats;
+    }
+    
+    for(int i = 0; i < vec.size(); i++){
+      feats.add(new Feature(getFeatureName() + "_" + i, vec.getValue(i)));
+    }
+    return feats;
+  }
+
+  @Override
+  public String getFeatureName() {
+    return "ContinuousText";
+  }
+    
+}

Added: ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterUtils.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterUtils.java?rev=1748736&view=auto
==============================================================================
--- ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterUtils.java
(added)
+++ ctakes/trunk/ctakes-coreference/src/main/java/org/apache/ctakes/coreference/util/ClusterUtils.java
Thu Jun 16 14:51:51 2016
@@ -0,0 +1,36 @@
+package org.apache.ctakes.coreference.util;
+
+import org.apache.uima.jcas.cas.NonEmptyFSList;
+import org.apache.uima.jcas.tcas.Annotation;
+
+public class ClusterUtils {
+  public static Annotation getMostRecent(NonEmptyFSList list, Annotation focus){
+    NonEmptyFSList cur = list;
+    Annotation annot = (Annotation) cur.getHead();
+    
+    // check if the focus annotation is before any of the list elements
+    if(annot.getEnd() > focus.getEnd()) return null;
+    
+    while(cur.getTail() instanceof NonEmptyFSList){
+      cur = (NonEmptyFSList) cur.getTail();
+      if(((Annotation)cur.getHead()).getEnd() < focus.getEnd()){
+        annot = (Annotation) cur.getHead();
+      }else{
+        break;
+      }
+    }
+
+    return annot;
+  }
+  
+  public static int getSize(NonEmptyFSList list){
+    int size=1;
+
+    NonEmptyFSList cur = list;
+    while(cur.getTail() instanceof NonEmptyFSList){
+      cur = (NonEmptyFSList) cur.getTail();
+      size++;
+    }
+    return size;
+  }
+}



Mime
View raw message