mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r712520 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout: cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java classifier/BayesFileFormatter.java classifier/bayes/common/BayesFeatureMapper.java common/Model.java
Date Sun, 09 Nov 2008 17:11:00 GMT
Author: srowen
Date: Sun Nov  9 09:11:00 2008
New Revision: 712520

URL: http://svn.apache.org/viewvc?rev=712520&view=rev
Log:
Tiny performance improvements / streamlining

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java?rev=712520&r1=712519&r2=712520&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/SlopeOnePrefsToDiffsReducer.java
Sun Nov  9 09:11:00 2008
@@ -27,7 +27,6 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.Iterator;
 import java.util.List;
 
@@ -43,7 +42,7 @@
     while (values.hasNext()) {
       prefs.add(new ItemPrefWritable(values.next()));
     }
-    Collections.sort(prefs, (Comparator<? super ItemPrefWritable>) ByItemIDComparator.getInstance());
+    Collections.sort(prefs, ByItemIDComparator.getInstance());
     int size = prefs.size();
     for (int i = 0; i < size; i++) {
       ItemPrefWritable first = prefs.get(i);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=712520&r1=712519&r2=712520&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
Sun Nov  9 09:11:00 2008
@@ -204,8 +204,8 @@
        
       writer.write(termBuffer, 0, termLen);
       writer.write(' ');
-      char[] tmp = new char[termLen];
-      System.arraycopy(termBuffer, 0, tmp, 0, termLen);
+      //char[] tmp = new char[termLen];
+      //System.arraycopy(termBuffer, 0, tmp, 0, termLen);
       //seen.add(tmp);// do this b/c CharArraySet doesn't allow offsets
     }
     ///numTokens++;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java?rev=712520&r1=712519&r2=712520&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesFeatureMapper.java
Sun Nov  9 09:11:00 2008
@@ -70,31 +70,32 @@
     String label = key.toString();
     int keyLen = label.length();
 
-    Map<String, Integer[]> wordList = new HashMap<String, Integer[]>(1000);
+    Map<String, int[]> wordList = new HashMap<String, int[]>(1000);
 
     StringBuilder builder = new StringBuilder(label);
     builder.ensureCapacity(32);// make sure we have a reasonably size buffer to
                                // begin with
     List<String> ngrams  = Model.generateNGramsWithoutLabel(value.toString(), gramSize);
     for (String ngram : ngrams) {
-      Integer[] count = wordList.get(ngram);
+      int[] count = wordList.get(ngram);
       if (count == null) {
-        count = new Integer[1];
+        count = new int[1];
         count[0] = 0;
         wordList.put(ngram, count);
       }
       count[0]++;
     }
     double lengthNormalisation = 0.0;
-    for (Integer[] D_kj : wordList.values()) {
+    for (int[] D_kj : wordList.values()) {
       // key is label,word
-      lengthNormalisation += D_kj[0].doubleValue() * D_kj[0].doubleValue();
+      double dkjValue = (double) D_kj[0];
+      lengthNormalisation += dkjValue * dkjValue;
     }
     lengthNormalisation = Math.sqrt(lengthNormalisation);
 
     // Output Length Normalized + TF Transformed Frequency per Word per Class
     // Log(1 + D_ij)/SQRT( SIGMA(k, D_kj) )
-    for (Map.Entry<String, Integer[]> entry : wordList.entrySet()) {
+    for (Map.Entry<String, int[]> entry : wordList.entrySet()) {
       // key is label,word
       String token = entry.getKey();
       builder.append(',').append(token);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java?rev=712520&r1=712519&r2=712520&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/common/Model.java Sun Nov  9
09:11:00 2008
@@ -83,15 +83,16 @@
     if (!labelList.containsKey(label)) {
       Integer labelId = labelList.size();
       labelList.put(label, labelId);
+      return labelId;
     }
     return labelList.get(label);
   }
 
   protected Integer getFeature(String feature) {
     if (!featureList.containsKey(feature)) {
-      
       Integer featureId = featureList.size();
       featureList.put(feature, featureId);
+      return featureId;
     }
     return featureList.get(feature);
   }



Mime
View raw message