Return-Path: Delivered-To: apmail-lucene-mahout-commits-archive@minotaur.apache.org Received: (qmail 6223 invoked from network); 4 Feb 2009 19:57:12 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 4 Feb 2009 19:57:12 -0000 Received: (qmail 18873 invoked by uid 500); 4 Feb 2009 19:57:12 -0000 Delivered-To: apmail-lucene-mahout-commits-archive@lucene.apache.org Received: (qmail 18808 invoked by uid 500); 4 Feb 2009 19:57:12 -0000 Mailing-List: contact mahout-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mahout-dev@lucene.apache.org Delivered-To: mailing list mahout-commits@lucene.apache.org Received: (qmail 18799 invoked by uid 99); 4 Feb 2009 19:57:11 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 04 Feb 2009 11:57:11 -0800 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 04 Feb 2009 19:57:04 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 60D0F23888E6; Wed, 4 Feb 2009 19:56:44 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r740846 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/impl/common/ core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/ core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ core/src/main/java... Date: Wed, 04 Feb 2009 19:56:43 -0000 To: mahout-commits@lucene.apache.org From: srowen@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090204195644.60D0F23888E6@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: srowen Date: Wed Feb 4 19:56:43 2009 New Revision: 740846 URL: http://svn.apache.org/viewvc?rev=740846&view=rev Log: (empty) Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=740846&r1=740845&r2=740846&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Wed Feb 4 19:56:43 2009 @@ -227,9 +227,6 @@ @Override public void putAll(Map map) { - if (map == null) { - throw new NullPointerException(); - } for (Entry entry : map.entrySet()) { put(entry.getKey(), entry.getValue()); } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=740846&r1=740845&r2=740846&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Wed Feb 4 19:56:43 2009 @@ -42,6 +42,7 @@ import java.util.Iterator; import java.util.Collections; import java.util.concurrent.locks.ReentrantLock; +import java.util.concurrent.atomic.AtomicInteger; /** *

A {@link DataModel} backed by a comma-delimited file. This class typically expects a file where each @@ -126,12 +127,7 @@ processFile(updateFile, data); } - List users = new ArrayList(data.size()); - for (Map.Entry> entries : data.entrySet()) { - users.add(buildUser(entries.getKey(), entries.getValue())); - } - - delegate = new GenericDataModel(users); + delegate = new GenericDataModel(new UserIteratableOverData(data)); loaded = true; } finally { @@ -165,6 +161,7 @@ protected void processFile(File dataOrUpdateFile, Map> data) { log.info("Reading file info..."); Map itemCache = new FastMap(1001); + AtomicInteger count = new AtomicInteger(); for (String line : new FileLineIterable(dataOrUpdateFile, false)) { if (line.length() > 0) { log.debug("Read line: {}", line); @@ -172,6 +169,10 @@ delimiter = determineDelimiter(line); } processLine(line, data, itemCache); + int currentCount = count.incrementAndGet(); + if (currentCount % 100000 == 0) { + log.info("Processed {} lines", currentCount); + } } } } @@ -387,4 +388,36 @@ return "FileDataModel[dataFile:" + dataFile + ']'; } + + private final class UserIteratableOverData implements Iterable { + private final Map> data; + private UserIteratableOverData(Map> data) { + this.data = data; + } + @Override + public Iterator iterator() { + return new UserIteratorOverData(data.entrySet().iterator()); + } + } + + private final class UserIteratorOverData implements Iterator { + private final Iterator>> dataIterator; + private UserIteratorOverData(Iterator>> dataIterator) { + this.dataIterator = dataIterator; + } + @Override + public boolean hasNext() { + return dataIterator.hasNext(); + } + @Override + public User next() { + Map.Entry> datum = dataIterator.next(); + return buildUser(datum.getKey(), datum.getValue()); + } + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java?rev=740846&r1=740845&r2=740846&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java Wed Feb 4 19:56:43 2009 @@ -41,7 +41,7 @@ throw new IllegalArgumentException("neighborhood is null"); } this.neighborhood = neighborhood; - int maxCacheSize = (int) Math.sqrt(dataModel.getNumUsers()); // just a dumb heuristic for sizing + int maxCacheSize = dataModel.getNumUsers(); // just a dumb heuristic for sizing this.neighborhoodCache = new Cache>(new NeighborhoodRetriever(neighborhood), maxCacheSize); } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java?rev=740846&r1=740845&r2=740846&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java Wed Feb 4 19:56:43 2009 @@ -253,10 +253,6 @@ if (!Double.isNaN(result)) { result = normalizeWeightResult(result, count, cachedNumItems); } - - if (log.isTraceEnabled()) { - log.trace("UserSimilarity between " + user1 + " and " + user2 + " is " + result); - } return result; } @@ -342,10 +338,6 @@ if (!Double.isNaN(result)) { result = normalizeWeightResult(result, count, cachedNumUsers); } - - if (log.isTraceEnabled()) { - log.trace("ItemSimilarity between " + item1 + " and " + item2 + " is " + result); - } return result; } Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java?rev=740846&r1=740845&r2=740846&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/netflix/TransposeToByUser.java Wed Feb 4 19:56:43 2009 @@ -24,14 +24,15 @@ import org.slf4j.LoggerFactory; import java.io.File; -import java.io.Writer; import java.io.OutputStreamWriter; import java.io.FileOutputStream; import java.io.IOException; +import java.io.PrintWriter; import java.util.Map; import java.util.Iterator; import java.util.List; import java.util.ArrayList; +import java.nio.charset.Charset; public final class TransposeToByUser { @@ -100,11 +101,11 @@ } private static void appendStringsToFile(List strings, File file) throws IOException { - Writer outputStreamWriter = new OutputStreamWriter(new FileOutputStream(file, true)); + PrintWriter outputStreamWriter = + new PrintWriter(new OutputStreamWriter(new FileOutputStream(file, true), Charset.forName("UTF-8"))); try { for (String s : strings) { - outputStreamWriter.write(s); - outputStreamWriter.write('\n'); + outputStreamWriter.println(s); } } finally { IOUtils.quietClose(outputStreamWriter);