mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tdunn...@apache.org
Subject svn commit: r990911 - in /mahout/trunk/core/src: main/java/org/apache/mahout/vectors/InteractionValueEncoder.java test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
Date Mon, 30 Aug 2010 19:34:03 GMT
Author: tdunning
Date: Mon Aug 30 19:34:03 2010
New Revision: 990911

URL: http://svn.apache.org/viewvc?rev=990911&view=rev
Log:
MAHOUT-492 - added unit test coverage of InteractionValueEncoder and modified InteractionValueEncoder
to handle a degenerate hashing condition to avoid unintended hash collisions between hashed
feature interactions and hashed features

Added:
    mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java?rev=990911&r1=990910&r2=990911&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
Mon Aug 30 19:34:03 2010
@@ -40,25 +40,25 @@ public class InteractionValueEncoder ext
   public void addToVector(String originalForm, double w, Vector data) {
   }
 
-  /**
-   * Adds a value to a vector.
-   *
-   * @param originalForm1 The original form of the first value as a string.
-   * @param originalForm2 The original form of the second value as a string.
-   * @param data          The vector to which the value should be added.
-   */
-  public void addInteractionToVector(String originalForm1, String originalForm2, Vector data)
{
-    int probes = getProbes();
-    String name = getName();
-    for (int i = 0; i < probes; i++) {
-      int h1 = hash1(name, originalForm1, i, data.size());
-      int h2 = hash2(name, originalForm1, i, data.size());
-      int j = hash1(name, originalForm2, i, data.size());
-      int n = (h1 + j * h2) % data.size();
-      trace(String.format("%s:%s", originalForm1, originalForm2), n);
-      data.set(n, data.get(n) + 1);
-    }
-  }
+     /**
+      * Adds a value to a vector.
+      *
+      * @param originalForm1 The original form of the first value as a string.
+      * @param originalForm2 The original form of the second value as a string.
+      * @param data          The vector to which the value should be added.
+      */
+     public void addInteractionToVector(String originalForm1, String originalForm2, Vector
data) {
+       int probes = getProbes();
+       String name = getName();
+       for (int i = 0; i < probes; i++) {
+         int h1 = hash1(name, originalForm1, i, data.size());
+         int h2 = hash2(name, originalForm1, i, data.size());
+         int j =  hash1(name, originalForm2, i, data.size());
+         int n = (h1 + (j+1)*h2) % data.size();
+         trace(String.format("%s:%s", originalForm1, originalForm2), n);
+         data.set(n, data.get(n) + 1);
+       }
+     }
 
   /**
    * Converts a value into a form that would help a human understand the internals of how
the

Added: mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java?rev=990911&view=auto
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
(added)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
Mon Aug 30 19:34:03 2010
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.vectors;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Locale;
+
+import static org.junit.Assert.assertEquals;
+
+public class InteractionValueEncoderTest {
+  @Test
+  public void testAddToVector() {
+    InteractionValueEncoder enc = new InteractionValueEncoder("interactions");
+    Vector v1 = new DenseVector(200);
+    enc.addInteractionToVector("a","b",v1);
+    int k = enc.getProbes();
+    // should set k distinct locations to 1
+    Assert.assertEquals((float) k, v1.norm(1), 0);
+    Assert.assertEquals(1.0, v1.maxValue(), 0);
+    // adding same interaction again should increment weights
+    enc.addInteractionToVector("a","b",v1);
+    Assert.assertEquals((float) k*2, v1.norm(1), 0);
+    Assert.assertEquals(2.0, v1.maxValue(), 0);
+
+    Vector v2 = new DenseVector(20000);
+    StaticWordValueEncoder wordEncoder = new StaticWordValueEncoder("test");
+    enc.addInteractionToVector("a","b",v2);
+    wordEncoder.addToVector("a", v2);
+    wordEncoder.addToVector("b", v2);
+    k = enc.getProbes();
+    int j = wordEncoder.getProbes();
+    //this assumes no hash collision
+    Assert.assertEquals((float) (k + 2*j), v2.norm(1), 0);
+  }
+
+}



Mime
View raw message