mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tdunn...@apache.org
Subject svn commit: r990910 - /mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
Date Mon, 30 Aug 2010 19:34:00 GMT
Author: tdunning
Date: Mon Aug 30 19:34:00 2010
New Revision: 990910

URL: http://svn.apache.org/viewvc?rev=990910&view=rev
Log:
MAHOUT-492 - added InteractionValueEncoder for encoding interaction of two categorical features
into feature vectors

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java?rev=990910&r1=990909&r2=990910&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
Mon Aug 30 19:34:00 2010
@@ -25,13 +25,9 @@ public class InteractionValueEncoder ext
 
   protected static final int INTERACTION_VALUE_HASH_SEED_1 = 100;
   protected static final int INTERACTION_VALUE_HASH_SEED_2 = 200;
-  private final String name1;
-  private final String name2;
 
-  protected InteractionValueEncoder(String name1, String name2) {
-    super(name1 + ':' + name2, 2);
-    this.name1 = name1;
-    this.name2 = name2;
+  protected InteractionValueEncoder(String name) {
+    super(name, 2);
   }
 
   /**
@@ -42,7 +38,6 @@ public class InteractionValueEncoder ext
    */
   @Override
   public void addToVector(String originalForm, double w, Vector data) {
-    throw new UnsupportedOperationException("Must have two arguments to encode interaction");
   }
 
   /**
@@ -52,11 +47,15 @@ public class InteractionValueEncoder ext
    * @param originalForm2 The original form of the second value as a string.
    * @param data          The vector to which the value should be added.
    */
-  public void addToVector(String originalForm1, String originalForm2, Vector data) {
+  public void addInteractionToVector(String originalForm1, String originalForm2, Vector data)
{
     int probes = getProbes();
+    String name = getName();
     for (int i = 0; i < probes; i++) {
-      int n = hash(name1, originalForm1, name2, originalForm2, i, data.size());
-      trace(String.format(Locale.ENGLISH, "%s:%s", originalForm1, originalForm2), n);
+      int h1 = hash1(name, originalForm1, i, data.size());
+      int h2 = hash2(name, originalForm1, i, data.size());
+      int j = hash1(name, originalForm2, i, data.size());
+      int n = (h1 + j * h2) % data.size();
+      trace(String.format("%s:%s", originalForm1, originalForm2), n);
       data.set(n, data.get(n) + 1);
     }
   }
@@ -73,5 +72,13 @@ public class InteractionValueEncoder ext
   public String asString(String originalForm) {
     return String.format(Locale.ENGLISH, "%s:%s", getName(), originalForm);
   }
+
+  protected int hash1(String term1, String term2, int probe, int numFeatures) {
+    return hash(term1, term2, probe + INTERACTION_VALUE_HASH_SEED_1, numFeatures);
+  }
+
+  protected int hash2(String term1, String term2, int probe, int numFeatures) {
+    return hash(term1, term2, probe + INTERACTION_VALUE_HASH_SEED_2, numFeatures);
+  }
 }
 



Mime
View raw message