mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r740846 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/impl/common/ core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/ core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ core/src/main/java...
Date Wed, 04 Feb 2009 19:56:43 GMT
Author: srowen
Date: Wed Feb  4 19:56:43 2009
New Revision: 740846

URL: http://svn.apache.org/viewvc?rev=740846&view=rev
Log: (empty)

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=740846&r1=740845&r2=740846&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
Wed Feb  4 19:56:43 2009
@@ -227,9 +227,6 @@
 
   @Override
   public void putAll(Map<? extends K, ? extends V> map) {
-    if (map == null) {
-      throw new NullPointerException();
-    }
     for (Entry<? extends K, ? extends V> entry : map.entrySet()) {
       put(entry.getKey(), entry.getValue());
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=740846&r1=740845&r2=740846&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
Wed Feb  4 19:56:43 2009
@@ -42,6 +42,7 @@
 import java.util.Iterator;
 import java.util.Collections;
 import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.atomic.AtomicInteger;
 
 /**
  * <p>A {@link DataModel} backed by a comma-delimited file. This class typically expects
a file where each
@@ -126,12 +127,7 @@
           processFile(updateFile, data);
         }
 
-        List<User> users = new ArrayList<User>(data.size());
-        for (Map.Entry<String, List<Preference>> entries : data.entrySet()) {
-          users.add(buildUser(entries.getKey(), entries.getValue()));
-        }
-
-        delegate = new GenericDataModel(users);
+        delegate = new GenericDataModel(new UserIteratableOverData(data));
         loaded = true;
 
       } finally {
@@ -165,6 +161,7 @@
   protected void processFile(File dataOrUpdateFile, Map<String, List<Preference>>
data) {
     log.info("Reading file info...");
     Map<String, Item> itemCache = new FastMap<String, Item>(1001);
+    AtomicInteger count = new AtomicInteger();
     for (String line : new FileLineIterable(dataOrUpdateFile, false)) {
       if (line.length() > 0) {
         log.debug("Read line: {}", line);
@@ -172,6 +169,10 @@
           delimiter = determineDelimiter(line);
         }
         processLine(line, data, itemCache);
+        int currentCount = count.incrementAndGet();
+        if (currentCount % 100000 == 0) {
+          log.info("Processed {} lines", currentCount);
+        }
       }
     }
   }
@@ -387,4 +388,36 @@
     return "FileDataModel[dataFile:" + dataFile + ']';
   }
 
+
+  private final class UserIteratableOverData implements Iterable<User> {
+    private final Map<String, List<Preference>> data;
+    private UserIteratableOverData(Map<String, List<Preference>> data) {
+      this.data = data;
+    }
+    @Override
+    public Iterator<User> iterator() {
+      return new UserIteratorOverData(data.entrySet().iterator());
+    }
+  }
+
+  private final class UserIteratorOverData implements Iterator<User> {
+    private final Iterator<Map.Entry<String, List<Preference>>> dataIterator;
+    private UserIteratorOverData(Iterator<Map.Entry<String, List<Preference>>>
dataIterator) {
+      this.dataIterator = dataIterator;
+    }
+    @Override
+    public boolean hasNext() {
+      return dataIterator.hasNext();
+    }
+    @Override
+    public User next() {
+      Map.Entry<String, List<Preference>> datum = dataIterator.next();
+      return buildUser(datum.getKey(), datum.getValue());
+    }
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java?rev=740846&r1=740845&r2=740846&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java
Wed Feb  4 19:56:43 2009
@@ -41,7 +41,7 @@
       throw new IllegalArgumentException("neighborhood is null");
     }
     this.neighborhood = neighborhood;
-    int maxCacheSize = (int) Math.sqrt(dataModel.getNumUsers()); // just a dumb heuristic
for sizing
+    int maxCacheSize = dataModel.getNumUsers(); // just a dumb heuristic for sizing
     this.neighborhoodCache = new Cache<Object, Collection<User>>(new NeighborhoodRetriever(neighborhood),
maxCacheSize);
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java?rev=740846&r1=740845&r2=740846&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
Wed Feb  4 19:56:43 2009
@@ -253,10 +253,6 @@
     if (!Double.isNaN(result)) {
       result = normalizeWeightResult(result, count, cachedNumItems);
     }
-
-    if (log.isTraceEnabled()) {
-      log.trace("UserSimilarity between " + user1 + " and " + user2 + " is " + result);
-    }
     return result;
   }
 
@@ -342,10 +338,6 @@
     if (!Double.isNaN(result)) {
       result = normalizeWeightResult(result, count, cachedNumUsers);
     }
-
-    if (log.isTraceEnabled()) {
-      log.trace("ItemSimilarity between " + item1 + " and " + item2 + " is " + result);
-    }
     return result;
   }
 

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java?rev=740846&r1=740845&r2=740846&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java
(original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java
Wed Feb  4 19:56:43 2009
@@ -24,14 +24,15 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
-import java.io.Writer;
 import java.io.OutputStreamWriter;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.PrintWriter;
 import java.util.Map;
 import java.util.Iterator;
 import java.util.List;
 import java.util.ArrayList;
+import java.nio.charset.Charset;
 
 public final class TransposeToByUser {
 
@@ -100,11 +101,11 @@
   }
 
   private static void appendStringsToFile(List<String> strings, File file) throws IOException
{
-    Writer outputStreamWriter = new OutputStreamWriter(new FileOutputStream(file, true));
+    PrintWriter outputStreamWriter =
+        new PrintWriter(new OutputStreamWriter(new FileOutputStream(file, true), Charset.forName("UTF-8")));
     try {
       for (String s : strings) {
-        outputStreamWriter.write(s);
-        outputStreamWriter.write('\n');
+        outputStreamWriter.println(s);
       }
     } finally {
       IOUtils.quietClose(outputStreamWriter);



Mime
View raw message