mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r1458765 - in /mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/ core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute...
Date Wed, 20 Mar 2013 11:15:10 GMT
Author: ssc
Date: Wed Mar 20 11:15:09 2013
New Revision: 1458765

URL: http://svn.apache.org/r1458765
Log:
MAHOUT-1167 Parallel item similarity precomputation on a single machine

Added:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/BatchItemSimilarities.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItem.java
      - copied, changed from r1457659, mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/SimilarItem.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItems.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItemsWriter.java
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/
    mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java
Removed:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/SimilarItem.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=1458765&r1=1458764&r2=1458765&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
Wed Mar 20 11:15:09 2013
@@ -39,6 +39,7 @@ import org.apache.mahout.cf.taste.common
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
 import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
 import org.apache.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.math.Vector;

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java?rev=1458765&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
(added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java
Wed Mar 20 11:15:09 2013
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.precompute;
+
+import com.google.common.io.Closeables;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+
+/**
+ * Persist the precomputed item similarities to a file that can later be used
+ * by a {@link org.apache.mahout.cf.taste.impl.similarity.file.FileItemSimilarity}
+ */
+public class FileSimilarItemsWriter implements SimilarItemsWriter {
+
+  private final File file;
+  private BufferedWriter writer;
+
+  public FileSimilarItemsWriter(File file) {
+    this.file = file;
+  }
+
+  @Override
+  public void open() throws IOException {
+    writer = new BufferedWriter(new FileWriter(file));
+  }
+
+  @Override
+  public void add(SimilarItems similarItems) throws IOException {
+    String itemID = String.valueOf(similarItems.getItemID());
+    for (SimilarItem similarItem : similarItems.getSimilarItems()) {
+      writer.write(itemID);
+      writer.write(',');
+      writer.write(String.valueOf(similarItem.getItemID()));
+      writer.write(',');
+      writer.write(String.valueOf(similarItem.getSimilarity()));
+      writer.newLine();
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    Closeables.closeQuietly(writer);
+  }
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java?rev=1458765&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
(added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java
Wed Mar 20 11:15:09 2013
@@ -0,0 +1,224 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity.precompute;
+
+import com.google.common.collect.Lists;
+import com.google.common.io.Closeables;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems;
+import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Precompute item similarities in parallel on a single machine. The recommender given to
this class must use a
+ * DataModel that holds the interactions in memory (such as
+ * {@link org.apache.mahout.cf.taste.impl.model.GenericDataModel} or
+ * {@link org.apache.mahout.cf.taste.impl.model.file.FileDataModel}) as fast random access
to the data is required
+ */
+public class MultithreadedBatchItemSimilarities extends BatchItemSimilarities {
+
+  private int batchSize;
+
+  private static final int DEFAULT_BATCH_SIZE = 100;
+
+  private static final Logger log = LoggerFactory.getLogger(MultithreadedBatchItemSimilarities.class);
+
+  /**
+   * @param recommender recommender to use
+   * @param similarItemsPerItem number of similar items to compute per item
+   */
+  public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem)
{
+    this(recommender, similarItemsPerItem, DEFAULT_BATCH_SIZE);
+  }
+
+  /**
+   * @param recommender recommender to use
+   * @param similarItemsPerItem number of similar items to compute per item
+   * @param batchSize size of item batches sent to worker threads
+   */
+  public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem,
int batchSize) {
+    super(recommender, similarItemsPerItem);
+    this.batchSize = batchSize;
+  }
+
+  @Override
+  public int computeItemSimilarities(int degreeOfParallelism, int maxDurationInHours, SimilarItemsWriter
writer)
+      throws IOException {
+
+    ExecutorService executorService = Executors.newFixedThreadPool(degreeOfParallelism +
1);
+
+    Output output = null;
+    try {
+      writer.open();
+
+      DataModel dataModel = getRecommender().getDataModel();
+
+      BlockingQueue<long[]> itemsIDsInBatches = queueItemIDsInBatches(dataModel, batchSize);
+      BlockingQueue<List<SimilarItems>> results = new LinkedBlockingQueue<List<SimilarItems>>();
+
+      AtomicInteger numActiveWorkers = new AtomicInteger(degreeOfParallelism);
+      for (int n = 0; n < degreeOfParallelism; n++) {
+        executorService.execute(new SimilarItemsWorker(n, itemsIDsInBatches, results, numActiveWorkers));
+      }
+
+      output = new Output(results, writer, numActiveWorkers);
+      executorService.execute(output);
+
+    } catch (Exception e) {
+      throw new IOException(e);
+    } finally {
+      executorService.shutdown();
+      try {
+        boolean succeeded = executorService.awaitTermination(maxDurationInHours, TimeUnit.HOURS);
+        if (!succeeded) {
+          throw new RuntimeException("Unable to complete the computation in " + maxDurationInHours
+ " hours!");
+        }
+      } catch (InterruptedException e) {
+        throw new RuntimeException(e);
+      }
+      Closeables.closeQuietly(writer);
+    }
+
+    return output.getNumSimilaritiesProcessed();
+  }
+
+  private BlockingQueue<long[]> queueItemIDsInBatches(DataModel dataModel, int batchSize)
throws TasteException {
+
+    LongPrimitiveIterator itemIDs = dataModel.getItemIDs();
+    int numItems = dataModel.getNumItems();
+
+    BlockingQueue<long[]> itemIDBatches = new LinkedBlockingQueue<long[]>((numItems
/ batchSize) + 1);
+
+    long[] batch = new long[batchSize];
+    int pos = 0;
+    while (itemIDs.hasNext()) {
+      if (pos == batchSize) {
+        itemIDBatches.add(batch.clone());
+        pos = 0;
+      }
+      batch[pos] = itemIDs.nextLong();
+      pos++;
+    }
+    int nonQueuedItemIDs = batchSize - pos;
+    if (nonQueuedItemIDs > 0) {
+      long[] lastBatch = new long[nonQueuedItemIDs];
+      System.arraycopy(batch, 0, lastBatch, 0, nonQueuedItemIDs);
+      itemIDBatches.add(lastBatch);
+    }
+
+    log.info("Queued {} items in {} batches", numItems, itemIDBatches.size());
+
+    return itemIDBatches;
+  }
+
+
+  private class Output implements Runnable {
+
+    private final BlockingQueue<List<SimilarItems>> results;
+    private final SimilarItemsWriter writer;
+    private final AtomicInteger numActiveWorkers;
+    private int numSimilaritiesProcessed = 0;
+
+    Output(BlockingQueue<List<SimilarItems>> results, SimilarItemsWriter writer,
AtomicInteger numActiveWorkers) {
+      this.results = results;
+      this.writer = writer;
+      this.numActiveWorkers = numActiveWorkers;
+    }
+
+    private int getNumSimilaritiesProcessed() {
+      return numSimilaritiesProcessed;
+    }
+
+    @Override
+    public void run() {
+      while (numActiveWorkers.get() != 0) {
+        try {
+          List<SimilarItems> similarItemsOfABatch = results.poll(10, TimeUnit.MILLISECONDS);
+          if (similarItemsOfABatch != null) {
+            for (SimilarItems similarItems : similarItemsOfABatch) {
+              writer.add(similarItems);
+              numSimilaritiesProcessed += similarItems.numSimilarItems();
+            }
+          }
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }
+  }
+
+  private class SimilarItemsWorker implements Runnable {
+
+    private final int number;
+    private final BlockingQueue<long[]> itemIDBatches;
+    private final BlockingQueue<List<SimilarItems>> results;
+    private final AtomicInteger numActiveWorkers;
+
+    SimilarItemsWorker(int number, BlockingQueue<long[]> itemIDBatches, BlockingQueue<List<SimilarItems>>
results,
+        AtomicInteger numActiveWorkers) {
+      this.number = number;
+      this.itemIDBatches = itemIDBatches;
+      this.results = results;
+      this.numActiveWorkers = numActiveWorkers;
+    }
+
+    @Override
+    public void run() {
+
+      int numBatchesProcessed = 0;
+      while (!itemIDBatches.isEmpty()) {
+        try {
+          long[] itemIDBatch = itemIDBatches.take();
+
+          List<SimilarItems> similarItemsOfBatch = Lists.newArrayListWithCapacity(itemIDBatch.length);
+          for (long itemID : itemIDBatch) {
+            List<RecommendedItem> similarItems = getRecommender().mostSimilarItems(itemID,
getSimilarItemsPerItem());
+
+            similarItemsOfBatch.add(new SimilarItems(itemID, similarItems));
+          }
+
+          results.offer(similarItemsOfBatch);
+
+          if (++numBatchesProcessed % 5 == 0) {
+            log.info("worker {} processed {} batches", number, numBatchesProcessed);
+          }
+
+        } catch (Exception e) {
+          throw new RuntimeException(e);
+        }
+      }
+      log.info("worker {} processed {} batches. done.", number, numBatchesProcessed);
+      numActiveWorkers.decrementAndGet();
+    }
+  }
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/BatchItemSimilarities.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/BatchItemSimilarities.java?rev=1458765&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/BatchItemSimilarities.java
(added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/BatchItemSimilarities.java
Wed Mar 20 11:15:09 2013
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.similarity.precompute;
+
+import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
+
+import java.io.IOException;
+
+public abstract class BatchItemSimilarities {
+
+  private ItemBasedRecommender recommender;
+  private int similarItemsPerItem;
+
+  /**
+   * @param recommender recommender to use
+   * @param similarItemsPerItem number of similar items to compute per item
+   */
+  protected BatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem)
{
+    this.recommender = recommender;
+    this.similarItemsPerItem = similarItemsPerItem;
+  }
+
+  protected ItemBasedRecommender getRecommender() {
+    return recommender;
+  }
+
+  protected int getSimilarItemsPerItem() {
+    return similarItemsPerItem;
+  }
+
+  /**
+   * @param degreeOfParallelism number of threads to use for the computation
+   * @param maxDurationInHours  maximum duration of the computation
+   * @param writer  {@link SimilarItemsWriter} used to persist the results
+   * @return  the number of similarities precomputed
+   * @throws IOException
+   * @throws RuntimeException if the computation takes longer than maxDurationInHours
+   */
+  public abstract int computeItemSimilarities(int degreeOfParallelism, int maxDurationInHours,
+      SimilarItemsWriter writer) throws IOException;
+}

Copied: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItem.java
(from r1457659, mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/SimilarItem.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItem.java?p2=mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItem.java&p1=mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/SimilarItem.java&r1=1457659&r2=1458765&rev=1458765&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/SimilarItem.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItem.java
Wed Mar 20 11:15:09 2013
@@ -15,19 +15,28 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
+package org.apache.mahout.cf.taste.similarity.precompute;
+
+import com.google.common.primitives.Doubles;
 
-import java.io.Serializable;
 import java.util.Comparator;
 
-class SimilarItem {
+/**
+ * Modeling similarity towards another item
+ */
+public class SimilarItem {
 
-  static final Comparator<SimilarItem> COMPARE_BY_SIMILARITY = new BySimilaritySimilarItemComparator();
+  public static final Comparator<SimilarItem> COMPARE_BY_SIMILARITY = new Comparator<SimilarItem>()
{
+    @Override
+    public int compare(SimilarItem s1, SimilarItem s2) {
+      return Doubles.compare(s1.similarity, s2.similarity);
+    }
+  };
 
   private final long itemID;
   private final double similarity;
 
-  SimilarItem(long itemID, double similarity) {
+  public SimilarItem(long itemID, double similarity) {
     this.itemID = itemID;
     this.similarity = similarity;
   }
@@ -40,10 +49,4 @@ class SimilarItem {
     return similarity;
   }
 
-  static class BySimilaritySimilarItemComparator implements Comparator<SimilarItem>,
Serializable {
-    @Override
-    public int compare(SimilarItem s1, SimilarItem s2) {
-      return s1.similarity == s2.similarity ? 0 : s1.similarity < s2.similarity ? -1 :
1;
-    }
-  }
 }

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItems.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItems.java?rev=1458765&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItems.java
(added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItems.java
Wed Mar 20 11:15:09 2013
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.similarity.precompute;
+
+import com.google.common.collect.UnmodifiableIterator;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Compact representation of all similar items for an item
+ */
+public class SimilarItems {
+
+  private final long itemID;
+  private final long[] similarItemIDs;
+  private final double[] similarities;
+
+  public SimilarItems(long itemID, List<RecommendedItem> similarItems) {
+    this.itemID = itemID;
+
+    int numSimilarItems = similarItems.size();
+    similarItemIDs = new long[numSimilarItems];
+    similarities = new double[numSimilarItems];
+
+    for (int n = 0; n < numSimilarItems; n++) {
+      similarItemIDs[n] = similarItems.get(n).getItemID();
+      similarities[n] = similarItems.get(n).getValue();
+    }
+  }
+
+  public long getItemID() {
+    return itemID;
+  }
+
+  public int numSimilarItems() {
+    return similarItemIDs.length;
+  }
+
+  public Iterable<SimilarItem> getSimilarItems() {
+    return new Iterable<SimilarItem>() {
+      @Override
+      public Iterator<SimilarItem> iterator() {
+        return new SimilarItemsIterator();
+      }
+    };
+  }
+
+  private class SimilarItemsIterator extends UnmodifiableIterator<SimilarItem> {
+
+    private int index = 0;
+
+    @Override
+    public boolean hasNext() {
+      return index < (similarItemIDs.length - 1);
+    }
+
+    @Override
+    public SimilarItem next() {
+      index++;
+      return new SimilarItem(similarItemIDs[index], similarities[index]);
+    }
+  }
+}
\ No newline at end of file

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItemsWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItemsWriter.java?rev=1458765&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItemsWriter.java
(added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItemsWriter.java
Wed Mar 20 11:15:09 2013
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.similarity.precompute;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * Used to persist the results of a batch item similarity computation
+ * conducted with a {@link BatchItemSimilarities} implementation
+ */
+public interface SimilarItemsWriter extends Closeable {
+
+  void open() throws IOException;
+
+  void add(SimilarItems similarItems) throws IOException;
+
+}

Added: mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java?rev=1458765&view=auto
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java
(added)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/example/BatchItemSimilaritiesGroupLens.java
Wed Mar 20 11:15:09 2013
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.similarity.precompute.example;
+
+import org.apache.mahout.cf.taste.example.grouplens.GroupLensDataModel;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
+import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
+import org.apache.mahout.cf.taste.impl.similarity.precompute.FileSimilarItemsWriter;
+import org.apache.mahout.cf.taste.impl.similarity.precompute.MultithreadedBatchItemSimilarities;
+import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities;
+
+import java.io.File;
+
+/**
+ * Example that precomputes all item similarities of the Movielens1M dataset
+ *
+ * Usage: download movielens1M from http://www.grouplens.org/node/73 , unzip it and invoke
this code with the path
+ * to the ratings.dat file as argument
+ *
+ */
+public class BatchItemSimilaritiesGroupLens {
+
+  public static void main(String[] args) throws Exception {
+
+    if (args.length != 1) {
+      System.err.println("Need path to ratings.dat of the movielens1M dataset as argument!");
+      System.exit(-1);
+    }
+
+    File resultFile = new File(System.getProperty("java.io.tmpdir"), "similarities.csv");
+    if (resultFile.exists()) {
+      resultFile.delete();
+    }
+
+    FileDataModel dataModel = new GroupLensDataModel(new File(args[0]));
+    GenericItemBasedRecommender recommender = new GenericItemBasedRecommender(dataModel,
+        new LogLikelihoodSimilarity(dataModel));
+    BatchItemSimilarities batch = new MultithreadedBatchItemSimilarities(recommender, 5);
+
+    int numSimilarities = batch.computeItemSimilarities(Runtime.getRuntime().availableProcessors(),
1,
+        new FileSimilarItemsWriter(resultFile));
+
+    System.out.println("Computed " + numSimilarities + " similarities for " + dataModel.getNumItems()
+ " items " +
+        "and saved them to " + resultFile.getAbsolutePath());
+  }
+
+}



Mime
View raw message