ctakes-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tm...@apache.org
Subject svn commit: r1750711 - /ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/distsem/WordEmbeddings.java
Date Wed, 29 Jun 2016 20:15:07 GMT
Author: tmill
Date: Wed Jun 29 20:15:07 2016
New Revision: 1750711

URL: http://svn.apache.org/viewvc?rev=1750711&view=rev
Log:
Added method for getting mean vector of embeddings.

Modified:
    ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/distsem/WordEmbeddings.java

Modified: ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/distsem/WordEmbeddings.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/distsem/WordEmbeddings.java?rev=1750711&r1=1750710&r2=1750711&view=diff
==============================================================================
--- ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/distsem/WordEmbeddings.java
(original)
+++ ctakes/trunk/ctakes-utils/src/main/java/org/apache/ctakes/utils/distsem/WordEmbeddings.java
Wed Jun 29 20:15:07 2016
@@ -10,10 +10,14 @@ public class WordEmbeddings {
 
   private Map<String,WordVector> vectors = null;
   private int dimensionality = 0;
+  private WordVector meanVector = null;
+  private WordVector rawMeanVector = null;
   
   public WordEmbeddings(int dim){
     this.vectors = new HashMap<>();
     this.dimensionality = dim;
+    this.meanVector = new WordVector("_mean_", new double[this.dimensionality]);
+    this.rawMeanVector = new WordVector("_mean_raw", new double[this.dimensionality]);
   }
   
   public WordEmbeddings(Map<String,WordVector> vectors){
@@ -38,9 +42,16 @@ public class WordEmbeddings {
     int wordBreak = line.indexOf(' ');
     String word = line.substring(0, wordBreak);
     String[] dims = line.substring(wordBreak+1).split(" ");
+    
+    if(this.meanVector == null){
+      this.meanVector = new WordVector("_mean_", new double[dims.length]);
+      this.rawMeanVector = new WordVector("_mean_raw", new double[dims.length]);
+    }
+    
     double[] vector = new double[dims.length];
     for(int i = 0; i < dims.length; i++){
       vector[i] = Double.valueOf(dims[i]);
+      meanVector.vector[i] += vector[i];
     }
     vectors.put(word, new WordVector(word, vector));
   }
@@ -101,4 +112,11 @@ public class WordEmbeddings {
     }
     return words;
   }
+  
+  public WordVector getMeanVector(){
+    for(int i = 0; i < this.rawMeanVector.getLength(); i++){
+      this.meanVector.vector[i] = this.rawMeanVector.vector[i] / vectors.size();
+    }
+    return this.meanVector;
+  }
 }



Mime
View raw message