Return-Path: Delivered-To: apmail-mahout-commits-archive@www.apache.org Received: (qmail 95505 invoked from network); 24 May 2010 22:45:20 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 24 May 2010 22:45:20 -0000 Received: (qmail 83783 invoked by uid 500); 24 May 2010 22:45:20 -0000 Delivered-To: apmail-mahout-commits-archive@mahout.apache.org Received: (qmail 83734 invoked by uid 500); 24 May 2010 22:45:20 -0000 Mailing-List: contact commits-help@mahout.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@mahout.apache.org Delivered-To: mailing list commits@mahout.apache.org Received: (qmail 83727 invoked by uid 99); 24 May 2010 22:45:20 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 May 2010 22:45:20 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 May 2010 22:45:16 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id C475123889E1; Mon, 24 May 2010 22:44:53 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r947844 [2/2] - in /mahout/trunk: ./ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/ core/src/main/java/org/apache... Date: Mon, 24 May 2010 22:44:53 -0000 To: commits@mahout.apache.org From: srowen@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100524224453.C475123889E1@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java Mon May 24 22:44:51 2010 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,24 +20,24 @@ package org.apache.mahout.cf.taste.hadoo import java.io.IOException; import java.util.Iterator; -import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; +import org.apache.mahout.math.VarIntWritable; +import org.apache.mahout.math.VarLongWritable; /** * counts all unique users, we ensure that we see userIDs sorted in ascending order via * secondary sort, so we don't have to buffer all of them */ public class CountUsersReducer extends MapReduceBase - implements Reducer { + implements Reducer { @Override - public void reduce(CountUsersKeyWritable key, Iterator userIDs, - OutputCollector out, Reporter reporter) + public void reduce(CountUsersKeyWritable key, Iterator userIDs, + OutputCollector out, Reporter reporter) throws IOException { long lastSeenUserID = Long.MIN_VALUE; @@ -50,7 +50,7 @@ public class CountUsersReducer extends M numberOfUsers++; } } - out.collect(new IntWritable(numberOfUsers), NullWritable.get()); + out.collect(new VarIntWritable(numberOfUsers), NullWritable.get()); } } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon May 24 22:44:51 2010 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -29,9 +29,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.SequenceFileInputFormat; @@ -46,6 +44,8 @@ import org.apache.mahout.cf.taste.hadoop import org.apache.mahout.cf.taste.hadoop.similarity.CoRating; import org.apache.mahout.cf.taste.hadoop.similarity.DistributedItemSimilarity; import org.apache.mahout.common.AbstractJob; +import org.apache.mahout.math.VarIntWritable; +import org.apache.mahout.math.VarLongWritable; /** *

Runs a completely distributed computation of the cosine distance of the itemvectors of the user-item-matrix @@ -143,9 +143,9 @@ public final class ItemSimilarityJob ext TextInputFormat.class, CountUsersMapper.class, CountUsersKeyWritable.class, - VLongWritable.class, + VarLongWritable.class, CountUsersReducer.class, - IntWritable.class, + VarIntWritable.class, NullWritable.class, TextOutputFormat.class); @@ -163,10 +163,10 @@ public final class ItemSimilarityJob ext itemVectorsPath, TextInputFormat.class, ToUserPrefsMapper.class, - VLongWritable.class, + VarLongWritable.class, EntityPrefWritable.class, ToItemVectorReducer.class, - VLongWritable.class, + VarLongWritable.class, EntityPrefWritableArrayWritable.class, SequenceFileOutputFormat.class); JobClient.runJob(itemVectors); @@ -175,10 +175,10 @@ public final class ItemSimilarityJob ext userVectorsPath, SequenceFileInputFormat.class, PreferredItemsPerUserMapper.class, - VLongWritable.class, + VarLongWritable.class, ItemPrefWithItemVectorWeightWritable.class, PreferredItemsPerUserReducer.class, - VLongWritable.class, + VarLongWritable.class, ItemPrefWithItemVectorWeightArrayWritable.class, SequenceFileOutputFormat.class); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java Mon May 24 22:44:51 2010 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -21,7 +21,6 @@ import java.io.IOException; import java.util.Iterator; import java.util.NoSuchElementException; -import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; @@ -30,13 +29,14 @@ import org.apache.hadoop.mapred.Reporter import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable; import org.apache.mahout.cf.taste.hadoop.similarity.DistributedItemSimilarity; +import org.apache.mahout.math.VarLongWritable; /** * for each item-vector, we compute its weight here and map out all entries with the user as key, * so we can create the user-vectors in the reducer */ public final class PreferredItemsPerUserMapper extends MapReduceBase - implements Mapper { + implements Mapper { private DistributedItemSimilarity distributedSimilarity; @@ -48,9 +48,9 @@ public final class PreferredItemsPerUser } @Override - public void map(VLongWritable item, + public void map(VarLongWritable item, EntityPrefWritableArrayWritable userPrefsArray, - OutputCollector output, + OutputCollector output, Reporter reporter) throws IOException { EntityPrefWritable[] userPrefs = userPrefsArray.getPrefs(); @@ -58,7 +58,7 @@ public final class PreferredItemsPerUser double weight = distributedSimilarity.weightOfItemVector(new UserPrefsIterator(userPrefs)); for (EntityPrefWritable userPref : userPrefs) { - output.collect(new VLongWritable(userPref.getID()), + output.collect(new VarLongWritable(userPref.getID()), new ItemPrefWithItemVectorWeightWritable(item.get(), weight, userPref.getPrefValue())); } } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java Mon May 24 22:44:51 2010 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -22,19 +22,19 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; -import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; +import org.apache.mahout.math.VarLongWritable; public final class PreferredItemsPerUserReducer extends MapReduceBase - implements Reducer { + implements Reducer { @Override - public void reduce(VLongWritable user, + public void reduce(VarLongWritable user, Iterator itemPrefs, - OutputCollector output, + OutputCollector output, Reporter reporter) throws IOException { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java Mon May 24 22:44:51 2010 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -22,13 +22,13 @@ import java.util.HashSet; import java.util.Iterator; import java.util.Set; -import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable; +import org.apache.mahout.math.VarLongWritable; /** * For each single item, collect all users with their preferences @@ -36,12 +36,12 @@ import org.apache.mahout.cf.taste.hadoop */ public final class ToItemVectorReducer extends MapReduceBase implements - Reducer { + Reducer { @Override - public void reduce(VLongWritable item, + public void reduce(VarLongWritable item, Iterator userPrefs, - OutputCollector output, + OutputCollector output, Reporter reporter) throws IOException { Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Mon May 24 22:44:51 2010 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -22,7 +22,6 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.JobClient; @@ -37,6 +36,7 @@ import org.apache.mahout.cf.taste.hadoop import org.apache.mahout.common.AbstractJob; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper; +import org.apache.mahout.math.VarLongWritable; public final class SlopeOneAverageDiffsJob extends AbstractJob { @@ -54,7 +54,7 @@ public final class SlopeOneAverageDiffsJ String averagesOutputPath = parsedArgs.get("--tempDir"); JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile, averagesOutputPath, - TextInputFormat.class, ToItemPrefsMapper.class, VLongWritable.class, EntityPrefWritable.class, + TextInputFormat.class, ToItemPrefsMapper.class, VarLongWritable.class, EntityPrefWritable.class, SlopeOnePrefsToDiffsReducer.class, EntityEntityWritable.class, FloatWritable.class, SequenceFileOutputFormat.class); JobClient.runJob(prefsToDiffsJobConf); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java Mon May 24 22:44:51 2010 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -24,19 +24,19 @@ import java.util.Iterator; import java.util.List; import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; +import org.apache.mahout.math.VarLongWritable; public final class SlopeOnePrefsToDiffsReducer extends MapReduceBase implements - Reducer { + Reducer { @Override - public void reduce(VLongWritable key, + public void reduce(VarLongWritable key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { Added: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java?rev=947844&view=auto ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java (added) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java Mon May 24 22:44:51 2010 @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.WritableComparable; + +public class VarIntWritable implements WritableComparable, Cloneable { + + private int value; + + public VarIntWritable() { + } + + public VarIntWritable(int value) { + this.value = value; + } + + public int get() { + return value; + } + + public void set(int value) { + this.value = value; + } + + @Override + public boolean equals(Object other) { + return other instanceof VarIntWritable && ((VarIntWritable) other).value == value; + } + + @Override + public int hashCode() { + return value; + } + + @Override + public String toString() { + return String.valueOf(value); + } + + @Override + public VarIntWritable clone() { + return new VarIntWritable(value); + } + + @Override + public int compareTo(VarIntWritable other) { + if (value < other.value) { + return -1; + } else if (value > other.value) { + return 1; + } + return 0; + } + + @Override + public void write(DataOutput out) throws IOException { + Varint.writeSignedVarInt(value, out); + } + + @Override + public void readFields(DataInput in) throws IOException { + value = Varint.readSignedVarInt(in); + } + +} Added: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java?rev=947844&view=auto ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java (added) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java Mon May 24 22:44:51 2010 @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.WritableComparable; + +public class VarLongWritable implements WritableComparable, Cloneable { + + private long value; + + public VarLongWritable() { + } + + public VarLongWritable(long value) { + this.value = value; + } + + public long get() { + return value; + } + + public void set(long value) { + this.value = value; + } + + @Override + public boolean equals(Object other) { + return other instanceof VarLongWritable && ((VarLongWritable) other).value == value; + } + + @Override + public int hashCode() { + return (int) ((value >>> 32) ^ value); + } + + @Override + public String toString() { + return String.valueOf(value); + } + + @Override + public VarLongWritable clone() { + return new VarLongWritable(value); + } + + @Override + public int compareTo(VarLongWritable other) { + if (value < other.value) { + return -1; + } else if (value > other.value) { + return 1; + } + return 0; + } + + @Override + public void write(DataOutput out) throws IOException { + Varint.writeSignedVarLong(value, out); + } + + @Override + public void readFields(DataInput in) throws IOException { + value = Varint.readSignedVarLong(in); + } + +} \ No newline at end of file Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java Mon May 24 22:44:51 2010 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -30,11 +30,9 @@ import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DoubleWritable; -import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.VLongWritable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable; @@ -44,6 +42,8 @@ import org.apache.mahout.cf.taste.hadoop import org.apache.mahout.cf.taste.hadoop.similarity.CoRating; import org.apache.mahout.cf.taste.hadoop.similarity.DistributedUncenteredZeroAssumingCosineSimilarity; import org.apache.mahout.common.MahoutTestCase; +import org.apache.mahout.math.VarIntWritable; +import org.apache.mahout.math.VarLongWritable; import org.easymock.IArgumentMatcher; import org.easymock.classextension.EasyMock; @@ -55,9 +55,9 @@ import org.easymock.classextension.EasyM public final class ItemSimilarityTest extends MahoutTestCase { public void testUserPrefsPerItemMapper() throws Exception { - OutputCollector output = + OutputCollector output = EasyMock.createMock(OutputCollector.class); - output.collect(new VLongWritable(34L), new EntityPrefWritable(12L, 2.3f)); + output.collect(new VarLongWritable(34L), new EntityPrefWritable(12L, 2.3f)); EasyMock.replay(output); new ToUserPrefsMapper().map(new LongWritable(), new Text("12,34,2.3"), output, null); @@ -66,9 +66,9 @@ public final class ItemSimilarityTest ex } public void testCountUsersMapper() throws Exception { - OutputCollector output = EasyMock.createMock(OutputCollector.class); - output.collect(keyForUserID(12L), EasyMock.eq(new VLongWritable(12L))); - output.collect(keyForUserID(35L), EasyMock.eq(new VLongWritable(35L))); + OutputCollector output = EasyMock.createMock(OutputCollector.class); + output.collect(keyForUserID(12L), EasyMock.eq(new VarLongWritable(12L))); + output.collect(keyForUserID(35L), EasyMock.eq(new VarLongWritable(35L))); EasyMock.replay(output); CountUsersMapper mapper = new CountUsersMapper(); @@ -98,13 +98,13 @@ public final class ItemSimilarityTest ex public void testCountUsersReducer() throws Exception { - OutputCollector output = EasyMock.createMock(OutputCollector.class); - output.collect(new IntWritable(3), NullWritable.get()); + OutputCollector output = EasyMock.createMock(OutputCollector.class); + output.collect(new VarIntWritable(3), NullWritable.get()); EasyMock.replay(output); - List userIDs = Arrays.asList(new VLongWritable(1L), new VLongWritable(1L), - new VLongWritable(3L), new VLongWritable(5L), - new VLongWritable(5L), new VLongWritable(5L)); + List userIDs = Arrays.asList(new VarLongWritable(1L), new VarLongWritable(1L), + new VarLongWritable(3L), new VarLongWritable(5L), + new VarLongWritable(5L), new VarLongWritable(5L)); new CountUsersReducer().reduce(null, userIDs.iterator(), output, null); @@ -116,14 +116,14 @@ public final class ItemSimilarityTest ex List userPrefs = Arrays.asList( new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f)); - OutputCollector output = + OutputCollector output = EasyMock.createMock(OutputCollector.class); - output.collect(EasyMock.eq(new VLongWritable(12L)), equalToUserPrefs(userPrefs)); + output.collect(EasyMock.eq(new VarLongWritable(12L)), equalToUserPrefs(userPrefs)); EasyMock.replay(output); - new ToItemVectorReducer().reduce(new VLongWritable(12L), userPrefs.iterator(), output, null); + new ToItemVectorReducer().reduce(new VarLongWritable(12L), userPrefs.iterator(), output, null); EasyMock.verify(output); } @@ -162,7 +162,7 @@ public final class ItemSimilarityTest ex } public void testPreferredItemsPerUserMapper() throws Exception { - OutputCollector output = + OutputCollector output = EasyMock.createMock(OutputCollector.class); EntityPrefWritableArrayWritable userPrefs = new EntityPrefWritableArrayWritable( new EntityPrefWritable[] { @@ -172,8 +172,8 @@ public final class ItemSimilarityTest ex double weight = new DistributedUncenteredZeroAssumingCosineSimilarity().weightOfItemVector(Arrays.asList(2.0f, 3.0f).iterator()); - output.collect(new VLongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 2.0f)); - output.collect(new VLongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 3.0f)); + output.collect(new VarLongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 2.0f)); + output.collect(new VarLongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 3.0f)); JobConf conf = new JobConf(); conf.set(ItemSimilarityJob.DISTRIBUTED_SIMILARITY_CLASSNAME, @@ -183,7 +183,7 @@ public final class ItemSimilarityTest ex PreferredItemsPerUserMapper mapper = new PreferredItemsPerUserMapper(); mapper.configure(conf); - mapper.map(new VLongWritable(34L), userPrefs, output, null); + mapper.map(new VarLongWritable(34L), userPrefs, output, null); EasyMock.verify(output); } @@ -194,15 +194,15 @@ public final class ItemSimilarityTest ex Arrays.asList(new ItemPrefWithItemVectorWeightWritable(34L, 5.0, 1.0f), new ItemPrefWithItemVectorWeightWritable(56L, 7.0, 2.0f)); - OutputCollector output = + OutputCollector output = EasyMock.createMock(OutputCollector.class); - output.collect(EasyMock.eq(new VLongWritable(12L)), equalToItemPrefs(itemPrefs)); + output.collect(EasyMock.eq(new VarLongWritable(12L)), equalToItemPrefs(itemPrefs)); EasyMock.replay(output); new PreferredItemsPerUserReducer().reduce( - new VLongWritable(12L), itemPrefs.iterator(), output, null); + new VarLongWritable(12L), itemPrefs.iterator(), output, null); EasyMock.verify(output); } @@ -254,7 +254,7 @@ public final class ItemSimilarityTest ex EasyMock.replay(output, itemPrefs); - new CopreferredItemsMapper().map(new VLongWritable(), itemPrefs, output, null); + new CopreferredItemsMapper().map(new VarLongWritable(), itemPrefs, output, null); EasyMock.verify(output, itemPrefs); } @@ -282,97 +282,78 @@ public final class ItemSimilarityTest ex public void testCompleteJob() throws Exception { - String tmpDirProp = System.getProperty("java.io.tmpdir"); - if (!tmpDirProp.endsWith("/")) { - tmpDirProp += "/"; - } - String tmpDirPath = tmpDirProp + ItemSimilarityTest.class.getCanonicalName(); - File tmpDir = new File(tmpDirPath); + File inputFile = getTestTempFile("prefs.txt"); + File outputDir = getTestTempDir("output"); + outputDir.delete(); + File tmpDir = getTestTempDir("tmp"); + + /* user-item-matrix + + Game Mouse PC Disk + Jane - 1 2 - + Paul 1 - 1 - + Fred - - - 1 + */ + BufferedWriter writer = new BufferedWriter(new FileWriter(inputFile)); try { - if (tmpDir.exists()) { - recursiveDelete(tmpDir); - } - tmpDir.mkdirs(); - - /* user-item-matrix - - Game Mouse PC Disk - Jane - 1 2 - - Paul 1 - 1 - - Fred - - - 1 - */ - - BufferedWriter writer = new BufferedWriter(new FileWriter(tmpDirPath+"/prefs.txt")); - try { - writer.write("2,1,1\n" + - "1,2,1\n" + - "3,4,1\n" + - "1,3,2\n" + - "2,3,1\n"); - } finally { - writer.close(); - } - - ItemSimilarityJob similarityJob = new ItemSimilarityJob(); - - Configuration conf = new Configuration(); - conf.set("mapred.input.dir", tmpDirPath+"/prefs.txt"); - conf.set("mapred.output.dir", tmpDirPath+"/output"); - conf.set("mapred.output.compress", Boolean.FALSE.toString()); + writer.write("2,1,1\n" + + "1,2,1\n" + + "3,4,1\n" + + "1,3,2\n" + + "2,3,1\n"); + } finally { + writer.close(); + } - similarityJob.setConf(conf); + ItemSimilarityJob similarityJob = new ItemSimilarityJob(); - similarityJob.run(new String[] { "--tempDir", tmpDirPath+"/tmp", "--similarityClassname", - "org.apache.mahout.cf.taste.hadoop.similarity.DistributedUncenteredZeroAssumingCosineSimilarity"}); + Configuration conf = new Configuration(); + conf.set("mapred.input.dir", inputFile.getAbsolutePath()); + conf.set("mapred.output.dir", outputDir.getAbsolutePath()); + conf.set("mapred.output.compress", Boolean.FALSE.toString()); - int numberOfUsers = ItemSimilarityJob.readNumberOfUsers(new JobConf(), tmpDirPath + "/tmp/countUsers/part-00000"); + similarityJob.setConf(conf); - assertEquals(3, numberOfUsers); + similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname", + "org.apache.mahout.cf.taste.hadoop.similarity.DistributedUncenteredZeroAssumingCosineSimilarity"}); - String filePath = tmpDirPath+"/output/part-00000"; - BufferedReader reader = new BufferedReader(new FileReader(filePath)); + File countUsersPart = new File(new File(tmpDir, "countUsers"), "part-00000"); + int numberOfUsers = ItemSimilarityJob.readNumberOfUsers(new JobConf(), countUsersPart.getAbsolutePath()); - String line; - int currentLine = 1; - while ( (line = reader.readLine()) != null) { + assertEquals(3, numberOfUsers); - String[] tokens = line.split("\t"); + File outPart = new File(outputDir, "part-00000"); + BufferedReader reader = new BufferedReader(new FileReader(outPart)); - long itemAID = Long.parseLong(tokens[0]); - long itemBID = Long.parseLong(tokens[1]); - double similarity = Double.parseDouble(tokens[2]); + String line; + int currentLine = 1; + while ( (line = reader.readLine()) != null) { - if (currentLine == 1) { - assertEquals(1L, itemAID); - assertEquals(3L, itemBID); - assertEquals(0.45, similarity, 0.01); - } + String[] tokens = line.split("\t"); - if (currentLine == 2) { - assertEquals(2L, itemAID); - assertEquals(3L, itemBID); - assertEquals(0.89, similarity, 0.01); - } + long itemAID = Long.parseLong(tokens[0]); + long itemBID = Long.parseLong(tokens[1]); + double similarity = Double.parseDouble(tokens[2]); - currentLine++; + if (currentLine == 1) { + assertEquals(1L, itemAID); + assertEquals(3L, itemBID); + assertEquals(0.45, similarity, 0.01); } - int linesWritten = currentLine-1; - assertEquals(2, linesWritten); + if (currentLine == 2) { + assertEquals(2L, itemAID); + assertEquals(3L, itemBID); + assertEquals(0.89, similarity, 0.01); + } - } finally { - recursiveDelete(tmpDir); + currentLine++; } - } - static void recursiveDelete(File fileOrDir) { - if (fileOrDir.isDirectory()) { - for (File innerFile : fileOrDir.listFiles()) { - recursiveDelete(innerFile); - } - } - fileOrDir.delete(); + int linesWritten = currentLine-1; + assertEquals(2, linesWritten); + } } Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java Mon May 24 22:44:51 2010 @@ -26,7 +26,7 @@ import java.nio.charset.CharacterCodingE import org.apache.hadoop.io.BinaryComparable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.WritableUtils; +import org.apache.mahout.math.Varint; /** * Writable for holding data generated from the collocation discovery jobs. Depending on the job configuration @@ -168,19 +168,19 @@ public class Gram extends BinaryComparab @Override public void readFields(DataInput in) throws IOException { - int newLength = WritableUtils.readVInt(in); + int newLength = Varint.readUnsignedVarInt(in); setCapacity(newLength, false); in.readFully(bytes, 0, newLength); - int newFrequency = WritableUtils.readVInt(in); + int newFrequency = Varint.readUnsignedVarInt(in); length = newLength; frequency = newFrequency; } @Override public void write(DataOutput out) throws IOException { - WritableUtils.writeVInt(out, length); + Varint.writeUnsignedVarInt(length, out); out.write(bytes, 0, length); - WritableUtils.writeVInt(out, frequency); + Varint.writeUnsignedVarInt(frequency, out); } /* Cribbed from o.a.hadoop.io.Text: Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java?rev=947844&r1=947843&r2=947844&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java Mon May 24 22:44:51 2010 @@ -25,7 +25,7 @@ import java.nio.charset.CharacterCodingE import org.apache.hadoop.io.BinaryComparable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.WritableUtils; +import org.apache.mahout.math.Varint; import org.apache.mahout.utils.nlp.collocations.llr.Gram.Type; /** A GramKey, based on the identity fields of Gram (type, string) plus a byte[] used for secondary ordering */ @@ -76,8 +76,8 @@ public class GramKey extends BinaryCompa @Override public void readFields(DataInput in) throws IOException { - int newLength = WritableUtils.readVInt(in); - int newPrimaryLength = WritableUtils.readVInt(in); + int newLength = Varint.readUnsignedVarInt(in); + int newPrimaryLength = Varint.readUnsignedVarInt(in); setCapacity(newLength, false); in.readFully(bytes, 0, newLength); length = newLength; @@ -87,8 +87,8 @@ public class GramKey extends BinaryCompa @Override public void write(DataOutput out) throws IOException { - WritableUtils.writeVInt(out, length); - WritableUtils.writeVInt(out, primaryLength); + Varint.writeUnsignedVarInt(length, out); + Varint.writeUnsignedVarInt(primaryLength, out); out.write(bytes, 0, length); }