mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r947844 [2/2] - in /mahout/trunk: ./ core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/ core/src/main/java/org/apache...
Date Mon, 24 May 2010 22:44:53 GMT
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -20,24 +20,24 @@ package org.apache.mahout.cf.taste.hadoo
 import java.io.IOException;
 import java.util.Iterator;
 
-import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
 
 /**
  * counts all unique users, we ensure that we see userIDs sorted in ascending order via
  * secondary sort, so we don't have to buffer all of them
  */
 public class CountUsersReducer extends MapReduceBase
-    implements Reducer<CountUsersKeyWritable,VLongWritable,IntWritable,NullWritable>
{
+    implements Reducer<CountUsersKeyWritable,VarLongWritable, VarIntWritable,NullWritable>
{
 
   @Override
-  public void reduce(CountUsersKeyWritable key, Iterator<VLongWritable> userIDs,
-      OutputCollector<IntWritable,NullWritable> out, Reporter reporter)
+  public void reduce(CountUsersKeyWritable key, Iterator<VarLongWritable> userIDs,
+      OutputCollector<VarIntWritable,NullWritable> out, Reporter reporter)
       throws IOException {
 
     long lastSeenUserID = Long.MIN_VALUE;
@@ -50,7 +50,7 @@ public class CountUsersReducer extends M
         numberOfUsers++;
       }
     }
-    out.collect(new IntWritable(numberOfUsers), NullWritable.get());
+    out.collect(new VarIntWritable(numberOfUsers), NullWritable.get());
   }
 
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -29,9 +29,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
@@ -46,6 +44,8 @@ import org.apache.mahout.cf.taste.hadoop
 import org.apache.mahout.cf.taste.hadoop.similarity.CoRating;
 import org.apache.mahout.cf.taste.hadoop.similarity.DistributedItemSimilarity;
 import org.apache.mahout.common.AbstractJob;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
 
 /**
  * <p>Runs a completely distributed computation of the cosine distance of the itemvectors
of the user-item-matrix
@@ -143,9 +143,9 @@ public final class ItemSimilarityJob ext
                                          TextInputFormat.class,
                                          CountUsersMapper.class,
                                          CountUsersKeyWritable.class,
-                                         VLongWritable.class,
+                                         VarLongWritable.class,
                                          CountUsersReducer.class,
-                                         IntWritable.class,
+                                         VarIntWritable.class,
                                          NullWritable.class,
                                          TextOutputFormat.class);
 
@@ -163,10 +163,10 @@ public final class ItemSimilarityJob ext
                                          itemVectorsPath,
                                          TextInputFormat.class,
                                          ToUserPrefsMapper.class,
-                                         VLongWritable.class,
+                                         VarLongWritable.class,
                                          EntityPrefWritable.class,
                                          ToItemVectorReducer.class,
-                                         VLongWritable.class,
+                                         VarLongWritable.class,
                                          EntityPrefWritableArrayWritable.class,
                                          SequenceFileOutputFormat.class);
     JobClient.runJob(itemVectors);
@@ -175,10 +175,10 @@ public final class ItemSimilarityJob ext
                                          userVectorsPath,
                                          SequenceFileInputFormat.class,
                                          PreferredItemsPerUserMapper.class,
-                                         VLongWritable.class,
+                                         VarLongWritable.class,
                                          ItemPrefWithItemVectorWeightWritable.class,
                                          PreferredItemsPerUserReducer.class,
-                                         VLongWritable.class,
+                                         VarLongWritable.class,
                                          ItemPrefWithItemVectorWeightArrayWritable.class,
                                          SequenceFileOutputFormat.class);
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 
-import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
@@ -30,13 +29,14 @@ import org.apache.hadoop.mapred.Reporter
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
 import org.apache.mahout.cf.taste.hadoop.similarity.DistributedItemSimilarity;
+import org.apache.mahout.math.VarLongWritable;
 
 /**
  * for each item-vector, we compute its weight here and map out all entries with the user
as key,
  * so we can create the user-vectors in the reducer
  */
 public final class PreferredItemsPerUserMapper extends MapReduceBase
-    implements Mapper<VLongWritable,EntityPrefWritableArrayWritable,VLongWritable,ItemPrefWithItemVectorWeightWritable>
{
+    implements Mapper<VarLongWritable,EntityPrefWritableArrayWritable,VarLongWritable,ItemPrefWithItemVectorWeightWritable>
{
 
   private DistributedItemSimilarity distributedSimilarity;
 
@@ -48,9 +48,9 @@ public final class PreferredItemsPerUser
   }
 
   @Override
-  public void map(VLongWritable item,
+  public void map(VarLongWritable item,
                   EntityPrefWritableArrayWritable userPrefsArray,
-                  OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightWritable>
output,
+                  OutputCollector<VarLongWritable,ItemPrefWithItemVectorWeightWritable>
output,
                   Reporter reporter) throws IOException {
 
     EntityPrefWritable[] userPrefs = userPrefsArray.getPrefs();
@@ -58,7 +58,7 @@ public final class PreferredItemsPerUser
     double weight = distributedSimilarity.weightOfItemVector(new UserPrefsIterator(userPrefs));
 
     for (EntityPrefWritable userPref : userPrefs) {
-      output.collect(new VLongWritable(userPref.getID()),
+      output.collect(new VarLongWritable(userPref.getID()),
           new ItemPrefWithItemVectorWeightWritable(item.get(), weight, userPref.getPrefValue()));
     }
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -22,19 +22,19 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
 
-import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.mahout.math.VarLongWritable;
 
 public final class PreferredItemsPerUserReducer extends MapReduceBase
-    implements Reducer<VLongWritable,ItemPrefWithItemVectorWeightWritable,VLongWritable,ItemPrefWithItemVectorWeightArrayWritable>
{
+    implements Reducer<VarLongWritable,ItemPrefWithItemVectorWeightWritable,VarLongWritable,ItemPrefWithItemVectorWeightArrayWritable>
{
 
   @Override
-  public void reduce(VLongWritable user,
+  public void reduce(VarLongWritable user,
                      Iterator<ItemPrefWithItemVectorWeightWritable> itemPrefs,
-                     OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable>
output,
+                     OutputCollector<VarLongWritable,ItemPrefWithItemVectorWeightArrayWritable>
output,
                      Reporter reporter)
       throws IOException {
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -22,13 +22,13 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
 
-import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
+import org.apache.mahout.math.VarLongWritable;
 
 /**
  * For each single item, collect all users with their preferences
@@ -36,12 +36,12 @@ import org.apache.mahout.cf.taste.hadoop
  */
 public final class ToItemVectorReducer
     extends MapReduceBase implements
-    Reducer<VLongWritable,EntityPrefWritable,VLongWritable,EntityPrefWritableArrayWritable>
{
+    Reducer<VarLongWritable,EntityPrefWritable,VarLongWritable,EntityPrefWritableArrayWritable>
{
 
   @Override
-  public void reduce(VLongWritable item,
+  public void reduce(VarLongWritable item,
                      Iterator<EntityPrefWritable> userPrefs,
-                     OutputCollector<VLongWritable,EntityPrefWritableArrayWritable>
output,
+                     OutputCollector<VarLongWritable,EntityPrefWritableArrayWritable>
output,
                      Reporter reporter)
       throws IOException {
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -22,7 +22,6 @@ import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.JobClient;
@@ -37,6 +36,7 @@ import org.apache.mahout.cf.taste.hadoop
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
+import org.apache.mahout.math.VarLongWritable;
 
 public final class SlopeOneAverageDiffsJob extends AbstractJob {
   
@@ -54,7 +54,7 @@ public final class SlopeOneAverageDiffsJ
     String averagesOutputPath = parsedArgs.get("--tempDir");
     
     JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile, averagesOutputPath,
-      TextInputFormat.class, ToItemPrefsMapper.class, VLongWritable.class, EntityPrefWritable.class,
+      TextInputFormat.class, ToItemPrefsMapper.class, VarLongWritable.class, EntityPrefWritable.class,
       SlopeOnePrefsToDiffsReducer.class, EntityEntityWritable.class, FloatWritable.class,
       SequenceFileOutputFormat.class);
     JobClient.runJob(prefsToDiffsJobConf);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -24,19 +24,19 @@ import java.util.Iterator;
 import java.util.List;
 
 import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
+import org.apache.mahout.math.VarLongWritable;
 
 public final class SlopeOnePrefsToDiffsReducer extends MapReduceBase implements
-    Reducer<VLongWritable,EntityPrefWritable,EntityEntityWritable,FloatWritable> {
+    Reducer<VarLongWritable,EntityPrefWritable,EntityEntityWritable,FloatWritable>
{
   
   @Override
-  public void reduce(VLongWritable key,
+  public void reduce(VarLongWritable key,
                      Iterator<EntityPrefWritable> values,
                      OutputCollector<EntityEntityWritable,FloatWritable> output,
                      Reporter reporter) throws IOException {

Added: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java?rev=947844&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java Mon May 24
22:44:51 2010
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+public class VarIntWritable implements WritableComparable<VarIntWritable>, Cloneable
{
+
+  private int value;
+
+  public VarIntWritable() {
+  }
+
+  public VarIntWritable(int value) {
+    this.value = value;
+  }
+
+  public int get() {
+    return value;
+  }
+
+  public void set(int value) {
+    this.value = value;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return other instanceof VarIntWritable && ((VarIntWritable) other).value == value;
+  }
+
+  @Override
+  public int hashCode() {
+    return value;
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(value);
+  }
+
+  @Override
+  public VarIntWritable clone() {
+    return new VarIntWritable(value);
+  }
+
+  @Override
+  public int compareTo(VarIntWritable other) {
+    if (value < other.value) {
+      return -1;
+    } else if (value > other.value) {
+      return 1;
+    }
+    return 0;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    Varint.writeSignedVarInt(value, out);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    value = Varint.readSignedVarInt(in);
+  }
+
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java?rev=947844&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java Mon May 24
22:44:51 2010
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+public class VarLongWritable implements WritableComparable<VarLongWritable>, Cloneable
{
+
+  private long value;
+
+  public VarLongWritable() {
+  }
+
+  public VarLongWritable(long value) {
+    this.value = value;
+  }
+
+  public long get() {
+    return value;
+  }
+
+  public void set(long value) {
+    this.value = value;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return other instanceof VarLongWritable && ((VarLongWritable) other).value ==
value;
+  }
+
+  @Override
+  public int hashCode() {
+    return (int) ((value >>> 32) ^ value);
+  }
+
+  @Override
+  public String toString() {
+    return String.valueOf(value);
+  }
+
+  @Override
+  public VarLongWritable clone() {
+    return new VarLongWritable(value);
+  }
+
+  @Override
+  public int compareTo(VarLongWritable other) {
+    if (value < other.value) {
+      return -1;
+    } else if (value > other.value) {
+      return 1;
+    }
+    return 0;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    Varint.writeSignedVarLong(value, out);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    value = Varint.readSignedVarLong(in);
+  }
+
+}
\ No newline at end of file

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
(original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -30,11 +30,9 @@ import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.VLongWritable;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
@@ -44,6 +42,8 @@ import org.apache.mahout.cf.taste.hadoop
 import org.apache.mahout.cf.taste.hadoop.similarity.CoRating;
 import org.apache.mahout.cf.taste.hadoop.similarity.DistributedUncenteredZeroAssumingCosineSimilarity;
 import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
 import org.easymock.IArgumentMatcher;
 import org.easymock.classextension.EasyMock;
 
@@ -55,9 +55,9 @@ import org.easymock.classextension.EasyM
 public final class ItemSimilarityTest extends MahoutTestCase {
 
   public void testUserPrefsPerItemMapper() throws Exception {
-    OutputCollector<VLongWritable,VLongWritable> output =
+    OutputCollector<VarLongWritable,VarLongWritable> output =
         EasyMock.createMock(OutputCollector.class);
-    output.collect(new VLongWritable(34L), new EntityPrefWritable(12L, 2.3f));
+    output.collect(new VarLongWritable(34L), new EntityPrefWritable(12L, 2.3f));
     EasyMock.replay(output);
 
     new ToUserPrefsMapper().map(new LongWritable(), new Text("12,34,2.3"), output, null);
@@ -66,9 +66,9 @@ public final class ItemSimilarityTest ex
   }
 
   public void testCountUsersMapper() throws Exception {
-    OutputCollector<CountUsersKeyWritable,VLongWritable> output = EasyMock.createMock(OutputCollector.class);
-    output.collect(keyForUserID(12L), EasyMock.eq(new VLongWritable(12L)));
-    output.collect(keyForUserID(35L), EasyMock.eq(new VLongWritable(35L)));
+    OutputCollector<CountUsersKeyWritable,VarLongWritable> output = EasyMock.createMock(OutputCollector.class);
+    output.collect(keyForUserID(12L), EasyMock.eq(new VarLongWritable(12L)));
+    output.collect(keyForUserID(35L), EasyMock.eq(new VarLongWritable(35L)));
     EasyMock.replay(output);
 
     CountUsersMapper mapper = new CountUsersMapper();
@@ -98,13 +98,13 @@ public final class ItemSimilarityTest ex
 
   public void testCountUsersReducer() throws Exception {
 
-    OutputCollector<IntWritable,NullWritable> output = EasyMock.createMock(OutputCollector.class);
-    output.collect(new IntWritable(3), NullWritable.get());
+    OutputCollector<VarIntWritable,NullWritable> output = EasyMock.createMock(OutputCollector.class);
+    output.collect(new VarIntWritable(3), NullWritable.get());
     EasyMock.replay(output);
 
-    List<VLongWritable> userIDs = Arrays.asList(new VLongWritable(1L), new VLongWritable(1L),
-                                                new VLongWritable(3L), new VLongWritable(5L),
-                                                new VLongWritable(5L), new VLongWritable(5L));
+    List<VarLongWritable> userIDs = Arrays.asList(new VarLongWritable(1L), new VarLongWritable(1L),
+                                                new VarLongWritable(3L), new VarLongWritable(5L),
+                                                new VarLongWritable(5L), new VarLongWritable(5L));
 
     new CountUsersReducer().reduce(null, userIDs.iterator(), output, null);
 
@@ -116,14 +116,14 @@ public final class ItemSimilarityTest ex
     List<EntityPrefWritable> userPrefs = Arrays.asList(
         new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f));
 
-    OutputCollector<VLongWritable,EntityPrefWritableArrayWritable> output =
+    OutputCollector<VarLongWritable,EntityPrefWritableArrayWritable> output =
         EasyMock.createMock(OutputCollector.class);
 
-    output.collect(EasyMock.eq(new VLongWritable(12L)), equalToUserPrefs(userPrefs));
+    output.collect(EasyMock.eq(new VarLongWritable(12L)), equalToUserPrefs(userPrefs));
 
     EasyMock.replay(output);
 
-    new ToItemVectorReducer().reduce(new VLongWritable(12L), userPrefs.iterator(), output,
null);
+    new ToItemVectorReducer().reduce(new VarLongWritable(12L), userPrefs.iterator(), output,
null);
 
     EasyMock.verify(output);
   }
@@ -162,7 +162,7 @@ public final class ItemSimilarityTest ex
   }
 
   public void testPreferredItemsPerUserMapper() throws Exception {
-    OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightWritable> output =
+    OutputCollector<VarLongWritable,ItemPrefWithItemVectorWeightWritable> output =
         EasyMock.createMock(OutputCollector.class);
     EntityPrefWritableArrayWritable userPrefs = new EntityPrefWritableArrayWritable(
         new EntityPrefWritable[] {
@@ -172,8 +172,8 @@ public final class ItemSimilarityTest ex
     double weight =
       new DistributedUncenteredZeroAssumingCosineSimilarity().weightOfItemVector(Arrays.asList(2.0f,
3.0f).iterator());
 
-    output.collect(new VLongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L,
weight, 2.0f));
-    output.collect(new VLongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L,
weight, 3.0f));
+    output.collect(new VarLongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L,
weight, 2.0f));
+    output.collect(new VarLongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L,
weight, 3.0f));
 
     JobConf conf = new JobConf();
     conf.set(ItemSimilarityJob.DISTRIBUTED_SIMILARITY_CLASSNAME,
@@ -183,7 +183,7 @@ public final class ItemSimilarityTest ex
 
     PreferredItemsPerUserMapper mapper = new PreferredItemsPerUserMapper();
     mapper.configure(conf);
-    mapper.map(new VLongWritable(34L), userPrefs, output, null);
+    mapper.map(new VarLongWritable(34L), userPrefs, output, null);
 
     EasyMock.verify(output);
   }
@@ -194,15 +194,15 @@ public final class ItemSimilarityTest ex
         Arrays.asList(new ItemPrefWithItemVectorWeightWritable(34L, 5.0, 1.0f),
                       new ItemPrefWithItemVectorWeightWritable(56L, 7.0, 2.0f));
 
-    OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output
=
+    OutputCollector<VarLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output
=
         EasyMock.createMock(OutputCollector.class);
 
-    output.collect(EasyMock.eq(new VLongWritable(12L)), equalToItemPrefs(itemPrefs));
+    output.collect(EasyMock.eq(new VarLongWritable(12L)), equalToItemPrefs(itemPrefs));
 
     EasyMock.replay(output);
 
     new PreferredItemsPerUserReducer().reduce(
-        new VLongWritable(12L), itemPrefs.iterator(), output, null);
+        new VarLongWritable(12L), itemPrefs.iterator(), output, null);
 
     EasyMock.verify(output);
   }
@@ -254,7 +254,7 @@ public final class ItemSimilarityTest ex
 
     EasyMock.replay(output, itemPrefs);
 
-    new CopreferredItemsMapper().map(new VLongWritable(), itemPrefs, output, null);
+    new CopreferredItemsMapper().map(new VarLongWritable(), itemPrefs, output, null);
 
     EasyMock.verify(output, itemPrefs);
   }
@@ -282,97 +282,78 @@ public final class ItemSimilarityTest ex
 
   public void testCompleteJob() throws Exception {
 
-    String tmpDirProp = System.getProperty("java.io.tmpdir");
-    if (!tmpDirProp.endsWith("/")) {
-      tmpDirProp += "/";
-    }
-    String tmpDirPath = tmpDirProp + ItemSimilarityTest.class.getCanonicalName();
-    File tmpDir = new File(tmpDirPath);
+    File inputFile = getTestTempFile("prefs.txt");
+    File outputDir = getTestTempDir("output");
+    outputDir.delete();
+    File tmpDir = getTestTempDir("tmp");
+
+    /* user-item-matrix
+
+                 Game   Mouse   PC    Disk
+         Jane     -       1      2      -
+         Paul     1       -      1      -
+         Fred     -       -      -      1
+     */
 
+    BufferedWriter writer = new BufferedWriter(new FileWriter(inputFile));
     try {
-      if (tmpDir.exists()) {
-        recursiveDelete(tmpDir);
-      }
-      tmpDir.mkdirs();
-
-      /* user-item-matrix
-
-                   Game   Mouse   PC    Disk
-           Jane     -       1      2      -
-           Paul     1       -      1      -
-           Fred     -       -      -      1
-       */
-
-      BufferedWriter writer = new BufferedWriter(new FileWriter(tmpDirPath+"/prefs.txt"));
-      try {
-        writer.write("2,1,1\n" +
-                     "1,2,1\n" +
-                     "3,4,1\n" +
-                     "1,3,2\n" +
-                     "2,3,1\n");
-      } finally {
-        writer.close();
-      }
-
-      ItemSimilarityJob similarityJob = new ItemSimilarityJob();
-
-      Configuration conf = new Configuration();
-      conf.set("mapred.input.dir", tmpDirPath+"/prefs.txt");
-      conf.set("mapred.output.dir", tmpDirPath+"/output");
-      conf.set("mapred.output.compress", Boolean.FALSE.toString());
+      writer.write("2,1,1\n" +
+                   "1,2,1\n" +
+                   "3,4,1\n" +
+                   "1,3,2\n" +
+                   "2,3,1\n");
+    } finally {
+      writer.close();
+    }
 
-      similarityJob.setConf(conf);
+    ItemSimilarityJob similarityJob = new ItemSimilarityJob();
 
-      similarityJob.run(new String[] { "--tempDir", tmpDirPath+"/tmp", "--similarityClassname",
-          "org.apache.mahout.cf.taste.hadoop.similarity.DistributedUncenteredZeroAssumingCosineSimilarity"});
+    Configuration conf = new Configuration();
+    conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+    conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+    conf.set("mapred.output.compress", Boolean.FALSE.toString());
 
-      int numberOfUsers = ItemSimilarityJob.readNumberOfUsers(new JobConf(), tmpDirPath +
"/tmp/countUsers/part-00000");
+    similarityJob.setConf(conf);
 
-      assertEquals(3, numberOfUsers);
+    similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+        "org.apache.mahout.cf.taste.hadoop.similarity.DistributedUncenteredZeroAssumingCosineSimilarity"});
 
-      String filePath = tmpDirPath+"/output/part-00000";
-      BufferedReader reader = new BufferedReader(new FileReader(filePath));
+    File countUsersPart = new File(new File(tmpDir, "countUsers"), "part-00000");
+    int numberOfUsers = ItemSimilarityJob.readNumberOfUsers(new JobConf(), countUsersPart.getAbsolutePath());
 
-      String line;
-      int currentLine = 1;
-      while ( (line = reader.readLine()) != null) {
+    assertEquals(3, numberOfUsers);
 
-        String[] tokens = line.split("\t");
+    File outPart = new File(outputDir, "part-00000");
+    BufferedReader reader = new BufferedReader(new FileReader(outPart));
 
-        long itemAID = Long.parseLong(tokens[0]);
-        long itemBID = Long.parseLong(tokens[1]);
-        double similarity = Double.parseDouble(tokens[2]);
+    String line;
+    int currentLine = 1;
+    while ( (line = reader.readLine()) != null) {
 
-        if (currentLine == 1) {
-          assertEquals(1L, itemAID);
-          assertEquals(3L, itemBID);
-          assertEquals(0.45, similarity, 0.01);
-        }
+      String[] tokens = line.split("\t");
 
-        if (currentLine == 2) {
-          assertEquals(2L, itemAID);
-          assertEquals(3L, itemBID);
-          assertEquals(0.89, similarity, 0.01);
-        }
+      long itemAID = Long.parseLong(tokens[0]);
+      long itemBID = Long.parseLong(tokens[1]);
+      double similarity = Double.parseDouble(tokens[2]);
 
-        currentLine++;
+      if (currentLine == 1) {
+        assertEquals(1L, itemAID);
+        assertEquals(3L, itemBID);
+        assertEquals(0.45, similarity, 0.01);
       }
 
-      int linesWritten = currentLine-1;
-      assertEquals(2, linesWritten);
+      if (currentLine == 2) {
+        assertEquals(2L, itemAID);
+        assertEquals(3L, itemBID);
+        assertEquals(0.89, similarity, 0.01);
+      }
 
-    } finally {
-      recursiveDelete(tmpDir);
+      currentLine++;
     }
-  }
 
-  static void recursiveDelete(File fileOrDir) {
-    if (fileOrDir.isDirectory()) {
-      for (File innerFile : fileOrDir.listFiles()) {
-        recursiveDelete(innerFile);
-      }
-    }
-    fileOrDir.delete();
+    int linesWritten = currentLine-1;
+    assertEquals(2, linesWritten);
+
   }
 
 }

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java
(original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java
Mon May 24 22:44:51 2010
@@ -26,7 +26,7 @@ import java.nio.charset.CharacterCodingE
 import org.apache.hadoop.io.BinaryComparable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableUtils;
+import org.apache.mahout.math.Varint;
 
 /**
  * Writable for holding data generated from the collocation discovery jobs. Depending on
the job configuration
@@ -168,19 +168,19 @@ public class Gram extends BinaryComparab
   
   @Override
   public void readFields(DataInput in) throws IOException {
-    int newLength = WritableUtils.readVInt(in);
+    int newLength = Varint.readUnsignedVarInt(in);
     setCapacity(newLength, false);
     in.readFully(bytes, 0, newLength);
-    int newFrequency = WritableUtils.readVInt(in);
+    int newFrequency = Varint.readUnsignedVarInt(in);
     length = newLength;
     frequency = newFrequency;
   }
   
   @Override
   public void write(DataOutput out) throws IOException {
-    WritableUtils.writeVInt(out, length);
+    Varint.writeUnsignedVarInt(length, out);
     out.write(bytes, 0, length);
-    WritableUtils.writeVInt(out, frequency);
+    Varint.writeUnsignedVarInt(frequency, out);
   }
 
   /* Cribbed from o.a.hadoop.io.Text:

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java
(original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java
Mon May 24 22:44:51 2010
@@ -25,7 +25,7 @@ import java.nio.charset.CharacterCodingE
 import org.apache.hadoop.io.BinaryComparable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableUtils;
+import org.apache.mahout.math.Varint;
 import org.apache.mahout.utils.nlp.collocations.llr.Gram.Type;
 
 /** A GramKey, based on the identity fields of Gram (type, string) plus a byte[] used for
secondary ordering */
@@ -76,8 +76,8 @@ public class GramKey extends BinaryCompa
   
   @Override
   public void readFields(DataInput in) throws IOException {
-    int newLength = WritableUtils.readVInt(in);
-    int newPrimaryLength = WritableUtils.readVInt(in);
+    int newLength = Varint.readUnsignedVarInt(in);
+    int newPrimaryLength = Varint.readUnsignedVarInt(in);
     setCapacity(newLength, false);
     in.readFully(bytes, 0, newLength);
     length = newLength;
@@ -87,8 +87,8 @@ public class GramKey extends BinaryCompa
   
   @Override
   public void write(DataOutput out) throws IOException {
-    WritableUtils.writeVInt(out, length);
-    WritableUtils.writeVInt(out, primaryLength);
+    Varint.writeUnsignedVarInt(length, out);
+    Varint.writeUnsignedVarInt(primaryLength, out);
     out.write(bytes, 0, length);
   }
   



Mime
View raw message