Return-Path: Delivered-To: apmail-lucene-mahout-commits-archive@locus.apache.org Received: (qmail 30881 invoked from network); 9 Nov 2008 17:11:52 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 9 Nov 2008 17:11:51 -0000 Received: (qmail 62903 invoked by uid 500); 9 Nov 2008 17:11:59 -0000 Delivered-To: apmail-lucene-mahout-commits-archive@lucene.apache.org Received: (qmail 62871 invoked by uid 500); 9 Nov 2008 17:11:58 -0000 Mailing-List: contact mahout-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mahout-dev@lucene.apache.org Delivered-To: mailing list mahout-commits@lucene.apache.org Received: (qmail 62862 invoked by uid 99); 9 Nov 2008 17:11:58 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 09 Nov 2008 09:11:58 -0800 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 09 Nov 2008 17:10:48 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id D04B82388961; Sun, 9 Nov 2008 09:11:00 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r712520 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout: cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java classifier/BayesFileFormatter.java classifier/bayes/common/BayesFeatureMapper.java common/Model.java Date: Sun, 09 Nov 2008 17:11:00 -0000 To: mahout-commits@lucene.apache.org From: srowen@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20081109171100.D04B82388961@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: srowen Date: Sun Nov 9 09:11:00 2008 New Revision: 712520 URL: http://svn.apache.org/viewvc?rev=712520&view=rev Log: Tiny performance improvements / streamlining Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java?rev=712520&r1=712519&r2=712520&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java Sun Nov 9 09:11:00 2008 @@ -27,7 +27,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.Iterator; import java.util.List; @@ -43,7 +42,7 @@ while (values.hasNext()) { prefs.add(new ItemPrefWritable(values.next())); } - Collections.sort(prefs, (Comparator) ByItemIDComparator.getInstance()); + Collections.sort(prefs, ByItemIDComparator.getInstance()); int size = prefs.size(); for (int i = 0; i < size; i++) { ItemPrefWritable first = prefs.get(i); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=712520&r1=712519&r2=712520&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Sun Nov 9 09:11:00 2008 @@ -204,8 +204,8 @@ writer.write(termBuffer, 0, termLen); writer.write(' '); - char[] tmp = new char[termLen]; - System.arraycopy(termBuffer, 0, tmp, 0, termLen); + //char[] tmp = new char[termLen]; + //System.arraycopy(termBuffer, 0, tmp, 0, termLen); //seen.add(tmp);// do this b/c CharArraySet doesn't allow offsets } ///numTokens++; Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java?rev=712520&r1=712519&r2=712520&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java Sun Nov 9 09:11:00 2008 @@ -70,31 +70,32 @@ String label = key.toString(); int keyLen = label.length(); - Map wordList = new HashMap(1000); + Map wordList = new HashMap(1000); StringBuilder builder = new StringBuilder(label); builder.ensureCapacity(32);// make sure we have a reasonably size buffer to // begin with List ngrams = Model.generateNGramsWithoutLabel(value.toString(), gramSize); for (String ngram : ngrams) { - Integer[] count = wordList.get(ngram); + int[] count = wordList.get(ngram); if (count == null) { - count = new Integer[1]; + count = new int[1]; count[0] = 0; wordList.put(ngram, count); } count[0]++; } double lengthNormalisation = 0.0; - for (Integer[] D_kj : wordList.values()) { + for (int[] D_kj : wordList.values()) { // key is label,word - lengthNormalisation += D_kj[0].doubleValue() * D_kj[0].doubleValue(); + double dkjValue = (double) D_kj[0]; + lengthNormalisation += dkjValue * dkjValue; } lengthNormalisation = Math.sqrt(lengthNormalisation); // Output Length Normalized + TF Transformed Frequency per Word per Class // Log(1 + D_ij)/SQRT( SIGMA(k, D_kj) ) - for (Map.Entry entry : wordList.entrySet()) { + for (Map.Entry entry : wordList.entrySet()) { // key is label,word String token = entry.getKey(); builder.append(',').append(token); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java?rev=712520&r1=712519&r2=712520&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java Sun Nov 9 09:11:00 2008 @@ -83,15 +83,16 @@ if (!labelList.containsKey(label)) { Integer labelId = labelList.size(); labelList.put(label, labelId); + return labelId; } return labelList.get(label); } protected Integer getFeature(String feature) { if (!featureList.containsKey(feature)) { - Integer featureId = featureList.size(); featureList.put(feature, featureId); + return featureId; } return featureList.get(feature); }