Author: tdunning
Date: Mon Aug 30 19:34:03 2010
New Revision: 990911
URL: http://svn.apache.org/viewvc?rev=990911&view=rev
Log:
MAHOUT-492 - added unit test coverage of InteractionValueEncoder and modified InteractionValueEncoder
to handle a degenerate hashing condition to avoid unintended hash collisions between hashed
feature interactions and hashed features
Added:
mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java?rev=990911&r1=990910&r2=990911&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
Mon Aug 30 19:34:03 2010
@@ -40,25 +40,25 @@ public class InteractionValueEncoder ext
public void addToVector(String originalForm, double w, Vector data) {
}
- /**
- * Adds a value to a vector.
- *
- * @param originalForm1 The original form of the first value as a string.
- * @param originalForm2 The original form of the second value as a string.
- * @param data The vector to which the value should be added.
- */
- public void addInteractionToVector(String originalForm1, String originalForm2, Vector data)
{
- int probes = getProbes();
- String name = getName();
- for (int i = 0; i < probes; i++) {
- int h1 = hash1(name, originalForm1, i, data.size());
- int h2 = hash2(name, originalForm1, i, data.size());
- int j = hash1(name, originalForm2, i, data.size());
- int n = (h1 + j * h2) % data.size();
- trace(String.format("%s:%s", originalForm1, originalForm2), n);
- data.set(n, data.get(n) + 1);
- }
- }
+ /**
+ * Adds a value to a vector.
+ *
+ * @param originalForm1 The original form of the first value as a string.
+ * @param originalForm2 The original form of the second value as a string.
+ * @param data The vector to which the value should be added.
+ */
+ public void addInteractionToVector(String originalForm1, String originalForm2, Vector
data) {
+ int probes = getProbes();
+ String name = getName();
+ for (int i = 0; i < probes; i++) {
+ int h1 = hash1(name, originalForm1, i, data.size());
+ int h2 = hash2(name, originalForm1, i, data.size());
+ int j = hash1(name, originalForm2, i, data.size());
+ int n = (h1 + (j+1)*h2) % data.size();
+ trace(String.format("%s:%s", originalForm1, originalForm2), n);
+ data.set(n, data.get(n) + 1);
+ }
+ }
/**
* Converts a value into a form that would help a human understand the internals of how
the
Added: mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java?rev=990911&view=auto
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
(added)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectors/InteractionValueEncoderTest.java
Mon Aug 30 19:34:03 2010
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.vectors;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.Locale;
+
+import static org.junit.Assert.assertEquals;
+
+public class InteractionValueEncoderTest {
+ @Test
+ public void testAddToVector() {
+ InteractionValueEncoder enc = new InteractionValueEncoder("interactions");
+ Vector v1 = new DenseVector(200);
+ enc.addInteractionToVector("a","b",v1);
+ int k = enc.getProbes();
+ // should set k distinct locations to 1
+ Assert.assertEquals((float) k, v1.norm(1), 0);
+ Assert.assertEquals(1.0, v1.maxValue(), 0);
+ // adding same interaction again should increment weights
+ enc.addInteractionToVector("a","b",v1);
+ Assert.assertEquals((float) k*2, v1.norm(1), 0);
+ Assert.assertEquals(2.0, v1.maxValue(), 0);
+
+ Vector v2 = new DenseVector(20000);
+ StaticWordValueEncoder wordEncoder = new StaticWordValueEncoder("test");
+ enc.addInteractionToVector("a","b",v2);
+ wordEncoder.addToVector("a", v2);
+ wordEncoder.addToVector("b", v2);
+ k = enc.getProbes();
+ int j = wordEncoder.getProbes();
+ //this assumes no hash collision
+ Assert.assertEquals((float) (k + 2*j), v2.norm(1), 0);
+ }
+
+}
|