Return-Path: Delivered-To: apmail-mahout-commits-archive@www.apache.org Received: (qmail 59039 invoked from network); 20 Aug 2010 03:23:48 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 20 Aug 2010 03:23:48 -0000 Received: (qmail 99939 invoked by uid 500); 20 Aug 2010 03:23:47 -0000 Delivered-To: apmail-mahout-commits-archive@mahout.apache.org Received: (qmail 99831 invoked by uid 500); 20 Aug 2010 03:23:45 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 99824 invoked by uid 99); 20 Aug 2010 03:23:43 -0000 Received: from Unknown (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 Aug 2010 03:23:43 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 20 Aug 2010 03:23:26 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id B809023889E1; Fri, 20 Aug 2010 03:22:07 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r987368 - /mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java Date: Fri, 20 Aug 2010 03:22:07 -0000 To: commits@mahout.apache.org From: tdunning@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100820032207.B809023889E1@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: tdunning Date: Fri Aug 20 03:22:06 2010 New Revision: 987368 URL: http://svn.apache.org/viewvc?rev=987368&view=rev Log: added InteractionValueEncoder for encoding interaction of two categorical features into feature vectors Added: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java Added: mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java?rev=987368&view=auto ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java (added) +++ mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java Fri Aug 20 03:22:06 2010 @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.vectors; + +import org.apache.mahout.math.Vector; + +public class InteractionValueEncoder extends FeatureVectorEncoder { + + protected static final int INTERACTION_VALUE_HASH_SEED_1 = 100; + protected static final int INTERACTION_VALUE_HASH_SEED_2 = 200; + + protected InteractionValueEncoder(String name) { + super(name, 2); + } + + /** + * Adds a value to a vector. + * + * @param originalForm The original form of the first value as a string. + * @param data The vector to which the value should be added. + */ + @Override + public void addToVector(String originalForm, double w, Vector data) { + } + + /** + * Adds a value to a vector. + * + * @param originalForm1 The original form of the first value as a string. + * @param originalForm2 The original form of the second value as a string. + * @param data The vector to which the value should be added. + */ + public void addInteractionToVector(String originalForm1, String originalForm2, Vector data) { + int probes = getProbes(); + String name = getName(); + for (int i = 0; i < probes; i++) { + int h1 = hash1(name, originalForm1, i, data.size()); + int h2 = hash2(name, originalForm1, i, data.size()); + int j = hash1(name, originalForm2, i, data.size()); + int n = (h1 + j*h2) % data.size(); + trace(String.format("%s:%s", originalForm1, originalForm2), n); + data.set(n, data.get(n) + 1); + } + } + + /** + * Converts a value into a form that would help a human understand the internals of how the value + * is being interpreted. For text-like things, this is likely to be a list of the terms found with + * associated weights (if any). + * + * @param originalForm The original form of the value as a string. + * @return A string that a human can read. + */ + @Override + public String asString(String originalForm) { + return String.format("%s:%s", getName(), originalForm); + } + + protected int hash1(String term1, String term2, int probe, int numFeatures) { + return hash(term1, term2, probe+INTERACTION_VALUE_HASH_SEED_1,numFeatures); + } + + protected int hash2(String term1, String term2, int probe, int numFeatures) { + return hash(term1, term2, probe+INTERACTION_VALUE_HASH_SEED_2,numFeatures); + } +} +