mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r792856 [5/13] - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/common/ main/java/org/apache/mahout/cf/taste/eval/ main/java/org/apache/mahout/cf/taste/hadoop/ main/java/org/apache/mahout/cf/taste/impl/common/ main/j...
Date Fri, 10 Jul 2009 09:35:28 GMT
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java Fri Jul 10 09:35:19 2009
@@ -35,17 +35,17 @@
 
 /**
  * <p>Implements an "inverse user frequency" transformation, which boosts preference values for items for which few
- * users have expressed a preference, and reduces preference values for items for which many users have expressed
- * a preference. The idea is that these "rare" {@link Item}s are more useful in deciding how similar two users'
- * tastes are, and so should be emphasized in other calculatioons. This idea is mentioned in
- * <a href="ftp://ftp.research.microsoft.com/pub/tr/tr-98-12.pdf">Empirical Analysis of Predictive Algorithms for
+ * users have expressed a preference, and reduces preference values for items for which many users have expressed a
+ * preference. The idea is that these "rare" {@link Item}s are more useful in deciding how similar two users' tastes
+ * are, and so should be emphasized in other calculatioons. This idea is mentioned in <a
+ * href="ftp://ftp.research.microsoft.com/pub/tr/tr-98-12.pdf">Empirical Analysis of Predictive Algorithms for
  * Collaborative Filtering</a>.</p>
  *
- * <p>A scaling factor is computed for each {@link Item} by dividing the total number of users by the number of
- * users expressing a preference for that item, and taking the log of that value. The log base of this calculation
- * can be controlled in the constructor. Intuitively, the right value for the base is equal to the average
- * number of users who express a preference for each item in your model. If each item has about 100 preferences
- * on average, 100.0 is a good log base.</p>
+ * <p>A scaling factor is computed for each {@link Item} by dividing the total number of users by the number of users
+ * expressing a preference for that item, and taking the log of that value. The log base of this calculation can be
+ * controlled in the constructor. Intuitively, the right value for the base is equal to the average number of users who
+ * express a preference for each item in your model. If each item has about 100 preferences on average, 100.0 is a good
+ * log base.</p>
  */
 public final class InverseUserFrequency implements PreferenceTransform {
 
@@ -59,7 +59,7 @@
    * <p>Creates a {@link InverseUserFrequency} transformation. Computations use the given log base.</p>
    *
    * @param dataModel {@link DataModel} from which to calculate user frequencies
-   * @param logBase calculation logarithm base
+   * @param logBase   calculation logarithm base
    * @throws IllegalArgumentException if dataModel is <code>null</code> or logBase is {@link Double#NaN} or &lt;= 1.0
    */
   public InverseUserFrequency(DataModel dataModel, double logBase) throws TasteException {
@@ -75,9 +75,7 @@
     recompute();
   }
 
-  /**
-   * @return log base used in this object's calculations
-   */
+  /** @return log base used in this object's calculations */
   public double getLogBase() {
     return logBase;
   }
@@ -114,7 +112,7 @@
     double logFactor = Math.log(logBase);
     for (Map.Entry<Item, int[]> entry : itemPreferenceCounts.getEntrySet()) {
       newIufFactors.put(entry.getKey(),
-                        Math.log((double) numUsers / (double) entry.getValue()[0]) / logFactor);
+          Math.log((double) numUsers / (double) entry.getValue()[0]) / logFactor);
     }
     iufFactors.set(Collections.unmodifiableMap(newIufFactors));
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java Fri Jul 10 09:35:19 2009
@@ -30,15 +30,12 @@
 import java.util.Collection;
 
 /**
- * <p>Normalizes preference values for a {@link User} by converting them to
- * <a href="http://mathworld.wolfram.com/z-Score.html">"z-scores"</a>. This process
- * normalizes preference values to adjust for variation in mean and variance of a
- * user's preferences.</p>
+ * <p>Normalizes preference values for a {@link User} by converting them to <a href="http://mathworld.wolfram.com/z-Score.html">"z-scores"</a>.
+ * This process normalizes preference values to adjust for variation in mean and variance of a user's preferences.</p>
  *
- * <p>Imagine two users, one who tends to rate every movie he/she sees four or five stars,
- * and another who uses the full one to five star range when assigning ratings. This
- * transform normalizes away the difference in scale used by the two users so that both
- * have a mean preference of 0.0 and a standard deviation of 1.0.</p>
+ * <p>Imagine two users, one who tends to rate every movie he/she sees four or five stars, and another who uses the full
+ * one to five star range when assigning ratings. This transform normalizes away the difference in scale used by the two
+ * users so that both have a mean preference of 0.0 and a standard deviation of 1.0.</p>
  */
 public final class ZScore implements PreferenceTransform {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java Fri Jul 10 09:35:19 2009
@@ -23,8 +23,8 @@
 import java.util.List;
 
 /**
- * <p>Implementations represent a repository of information about {@link User}s and their
- * associated {@link Preference}s for {@link Item}s.</p>
+ * <p>Implementations represent a repository of information about {@link User}s and their associated {@link Preference}s
+ * for {@link Item}s.</p>
  */
 public interface DataModel extends Refreshable {
 
@@ -38,7 +38,8 @@
    * @param id user ID
    * @return {@link User} who has that ID
    * @throws TasteException if an error occurs while accessing the data
-   * @throws org.apache.mahout.cf.taste.common.NoSuchUserException if there is no such {@link User}
+   * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
+   *                        if there is no such {@link User}
    */
   User getUser(Object id) throws TasteException;
 
@@ -52,7 +53,8 @@
    * @param id item ID
    * @return {@link Item} that has that ID
    * @throws TasteException if an error occurs while accessing the data
-   * @throws org.apache.mahout.cf.taste.common.NoSuchItemException if there is no such {@link Item}
+   * @throws org.apache.mahout.cf.taste.common.NoSuchItemException
+   *                        if there is no such {@link Item}
    */
   Item getItem(Object id) throws TasteException;
 
@@ -65,15 +67,14 @@
 
   /**
    * @param itemID item ID
-   * @return all existing {@link Preference}s expressed for that item, ordered by {@link User},
-   *         as an array
+   * @return all existing {@link Preference}s expressed for that item, ordered by {@link User}, as an array
    * @throws TasteException if an error occurs while accessing the data
    */
   Preference[] getPreferencesForItemAsArray(Object itemID) throws TasteException;
 
   /**
-   * @return total number of {@link Item}s known to the model. This is generally the union
-   *         of all {@link Item}s preferred by at least one {@link User} but could include more.
+   * @return total number of {@link Item}s known to the model. This is generally the union of all {@link Item}s
+   *         preferred by at least one {@link User} but could include more.
    * @throws TasteException if an error occurs while accessing the data
    */
   int getNumItems() throws TasteException;
@@ -87,9 +88,9 @@
   /**
    * @param itemIDs item IDs to check for
    * @return the number of users who have expressed a preference for all of the items
-   * @throws TasteException if an error occurs while accessing the data
-   * @throws IllegalArgumentException if itemIDs is null, empty, or larger than 2 elements
-   *  since currently only queries of up to 2 items are needed and supported
+   * @throws TasteException           if an error occurs while accessing the data
+   * @throws IllegalArgumentException if itemIDs is null, empty, or larger than 2 elements since currently only queries
+   *                                  of up to 2 items are needed and supported
    */
   int getNumUsersWithPreferenceFor(Object... itemIDs) throws TasteException;
 
@@ -98,7 +99,7 @@
    *
    * @param userID user to set preference for
    * @param itemID item to set preference for
-   * @param value preference value
+   * @param value  preference value
    * @throws TasteException if an error occurs while accessing the data
    */
   void setPreference(Object userID, Object itemID, double value) throws TasteException;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Item.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Item.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Item.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Item.java Fri Jul 10 09:35:19 2009
@@ -18,21 +18,17 @@
 package org.apache.mahout.cf.taste.model;
 
 /**
- * <p>Implementations of this interface represent items that {@link User}s have
- * preferences for, and which can be recommended to them. {@link Item}s must have
- * a unique ID of some kind, and must be {@link Comparable}.</p>
+ * <p>Implementations of this interface represent items that {@link User}s have preferences for, and which can be
+ * recommended to them. {@link Item}s must have a unique ID of some kind, and must be {@link Comparable}.</p>
  */
 public interface Item extends Comparable<Item> {
 
-  /**
-   * @return unique ID for this item
-   */
+  /** @return unique ID for this item */
   Object getID();
 
   /**
-   * @return true if and only if this {@link Item} can be recommended to a user;
-   *         for example, this could be false for an {@link Item} that is no longer
-   *         available but which remains valuable for recommendation
+   * @return true if and only if this {@link Item} can be recommended to a user; for example, this could be false for an
+   *         {@link Item} that is no longer available but which remains valuable for recommendation
    */
   boolean isRecommendable();
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java Fri Jul 10 09:35:19 2009
@@ -26,8 +26,8 @@
   DataSource getDataSource();
 
   /**
-   * @param assumeExists assume the item exists; don't consult the underlying database. This is a necessary
-   * performance enhancement shortcut needed by slope one recommenders
+   * @param assumeExists assume the item exists; don't consult the underlying database. This is a necessary performance
+   *                     enhancement shortcut needed by slope one recommenders
    * @see #getItem(Object)
    */
   Item getItem(Object id, boolean assumeExists) throws TasteException;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Preference.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Preference.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Preference.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/Preference.java Fri Jul 10 09:35:19 2009
@@ -18,25 +18,20 @@
 package org.apache.mahout.cf.taste.model;
 
 /**
- * <p>A {@link Preference} encapsulates an {@link Item} and a preference value, which
- * indicates the strength of the preference for it. {@link Preference}s are associated
- * to {@link User}s.</p>
+ * <p>A {@link Preference} encapsulates an {@link Item} and a preference value, which indicates the strength of the
+ * preference for it. {@link Preference}s are associated to {@link User}s.</p>
  */
 public interface Preference {
 
-  /**
-   * @return {@link User} who prefers the {@link Item}
-   */
+  /** @return {@link User} who prefers the {@link Item} */
   User getUser();
 
-  /**
-   * @return {@link Item} that is preferred
-   */
+  /** @return {@link Item} that is preferred */
   Item getItem();
 
   /**
-   * @return strength of the preference for that item. Zero should indicate "no preference either way";
-   *         positive values indicate preference and negative values indicate dislike
+   * @return strength of the preference for that item. Zero should indicate "no preference either way"; positive values
+   *         indicate preference and negative values indicate dislike
    */
   double getValue();
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java Fri Jul 10 09:35:19 2009
@@ -18,8 +18,8 @@
 package org.apache.mahout.cf.taste.model;
 
 /**
- * An alternate representation of an array of {@link Preference}. Implementations, in theory,
- * can produce a more memory-efficient representation. This is not used yet.
+ * An alternate representation of an array of {@link Preference}. Implementations, in theory, can produce a more
+ * memory-efficient representation. This is not used yet.
  */
 public interface PreferenceArray {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/User.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/User.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/User.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/User.java Fri Jul 10 09:35:19 2009
@@ -17,45 +17,39 @@
 
 package org.apache.mahout.cf.taste.model;
 
-/**
- * <p>Implementations represent a user, who has preferences for {@link Item}s.</p>
- */
+/** <p>Implementations represent a user, who has preferences for {@link Item}s.</p> */
 public interface User extends Comparable<User> {
 
-  /**
-   * @return unique user ID
-   */
+  /** @return unique user ID */
   Object getID();
 
   /**
    * @param itemID ID of item to get the user's preference for
-   * @return user's {@link Preference} for that {@link Item}, or <code>null</code> if the user expresses
-   *         no such preference
+   * @return user's {@link Preference} for that {@link Item}, or <code>null</code> if the user expresses no such
+   *         preference
    */
   Preference getPreferenceFor(Object itemID);
 
   /**
-   * Sets a preference that this {@link User} has. Note that in general callers should expect this to
-   * be a slow operation, compared to {@link #getPreferenceFor(Object)}.
+   * Sets a preference that this {@link User} has. Note that in general callers should expect this to be a slow
+   * operation, compared to {@link #getPreferenceFor(Object)}.
    */
   void setPreference(Item item, double value);
 
-  /**
-   * Removes a preference. This method should also be considered potentially slow.
-   */
+  /** Removes a preference. This method should also be considered potentially slow. */
   void removePreference(Object itemID);
 
   /**
-   * <p>Returns a sequence of {@link Preference}s for this {@link User} which can be iterated over.
-   * Note that the sequence <em>must</em> be "in order": ordered by {@link Item}.</p>
+   * <p>Returns a sequence of {@link Preference}s for this {@link User} which can be iterated over. Note that the
+   * sequence <em>must</em> be "in order": ordered by {@link Item}.</p>
    *
    * @return a sequence of {@link Preference}s
    */
   Iterable<Preference> getPreferences();
 
   /**
-   * <p>Returns an array view of {@link Preference}s for this {@link User}.
-   * Note that the sequence <em>must</em> be "in order": ordered by {@link Item}.</p>
+   * <p>Returns an array view of {@link Preference}s for this {@link User}. Note that the sequence <em>must</em> be "in
+   * order": ordered by {@link Item}.</p>
    *
    * @return an array of {@link Preference}s
    */

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java Fri Jul 10 09:35:19 2009
@@ -24,15 +24,16 @@
 import java.util.Collection;
 
 /**
- * <p>Implementations of this interface compute a "neighborhood" of {@link User}s like a
- * given {@link User}. This neighborhood can be used to compute recommendations then.</p>
+ * <p>Implementations of this interface compute a "neighborhood" of {@link User}s like a given {@link User}. This
+ * neighborhood can be used to compute recommendations then.</p>
  */
 public interface UserNeighborhood extends Refreshable {
 
   /**
    * @param userID ID of user for which a neighborhood will be computed
    * @return {@link Collection} of {@link User}s in the neighborhood
-   * @throws org.apache.mahout.cf.taste.common.TasteException if an error occurs while accessing data
+   * @throws org.apache.mahout.cf.taste.common.TasteException
+   *          if an error occurs while accessing data
    */
   Collection<User> getUserNeighborhood(Object userID) throws TasteException;
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ClusteringRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ClusteringRecommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ClusteringRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ClusteringRecommender.java Fri Jul 10 09:35:19 2009
@@ -22,14 +22,11 @@
 
 import java.util.Collection;
 
-/**
- * <p>Interface implemented by "clustering" recommenders.</p>
- */
+/** <p>Interface implemented by "clustering" recommenders.</p> */
 public interface ClusteringRecommender extends Recommender {
 
   /**
-   * <p>Returns the cluster of users to which the given {@link User}, denoted by user ID,
-   * belongs.</p>
+   * <p>Returns the cluster of users to which the given {@link User}, denoted by user ID, belongs.</p>
    *
    * @param userID user ID for which to find a cluster
    * @return {@link Collection} of {@link User}s in the requested user's cluster

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java Fri Jul 10 09:35:19 2009
@@ -23,13 +23,11 @@
 
 import java.util.List;
 
-/**
- * <p>Interface implemented by "item-based" recommenders.</p>
- */
+/** <p>Interface implemented by "item-based" recommenders.</p> */
 public interface ItemBasedRecommender extends Recommender {
 
   /**
-   * @param itemID ID of {@link Item} for which to find most similar other {@link Item}s
+   * @param itemID  ID of {@link Item} for which to find most similar other {@link Item}s
    * @param howMany desired number of most similar {@link Item}s to find
    * @return {@link Item}s most similar to the given item, ordered from most similar to least
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
@@ -37,10 +35,10 @@
   List<RecommendedItem> mostSimilarItems(Object itemID, int howMany) throws TasteException;
 
   /**
-   * @param itemID ID of {@link Item} for which to find most similar other {@link Item}s
-   * @param howMany desired number of most similar {@link Item}s to find
-   * @param rescorer {@link Rescorer} which can adjust item-item similarity
-   * estimates used to determine most similar items
+   * @param itemID   ID of {@link Item} for which to find most similar other {@link Item}s
+   * @param howMany  desired number of most similar {@link Item}s to find
+   * @param rescorer {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+   *                 items
    * @return {@link Item}s most similar to the given item, ordered from most similar to least
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
    */
@@ -50,18 +48,17 @@
 
   /**
    * @param itemIDs IDs of {@link Item} for which to find most similar other {@link Item}s
-   * @param howMany desired number of most similar {@link Item}s to find
-   * estimates used to determine most similar items
+   * @param howMany desired number of most similar {@link Item}s to find estimates used to determine most similar items
    * @return {@link Item}s most similar to the given items, ordered from most similar to least
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
    */
   List<RecommendedItem> mostSimilarItems(List<Object> itemIDs, int howMany) throws TasteException;
 
   /**
-   * @param itemIDs IDs of {@link Item} for which to find most similar other {@link Item}s
-   * @param howMany desired number of most similar {@link Item}s to find
-   * @param rescorer {@link Rescorer} which can adjust item-item similarity
-   * estimates used to determine most similar items
+   * @param itemIDs  IDs of {@link Item} for which to find most similar other {@link Item}s
+   * @param howMany  desired number of most similar {@link Item}s to find
+   * @param rescorer {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar
+   *                 items
    * @return {@link Item}s most similar to the given items, ordered from most similar to least
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
    */
@@ -70,21 +67,20 @@
                                          Rescorer<Pair<Item, Item>> rescorer) throws TasteException;
 
   /**
-   * <p>Lists the {@link Item}s that were most influential in recommending a given item to a given user.
-   * Exactly how this is determined is left to the implementation, but, generally this will return items
-   * that the user prefers and that are similar to the given item.</p>
+   * <p>Lists the {@link Item}s that were most influential in recommending a given item to a given user. Exactly how
+   * this is determined is left to the implementation, but, generally this will return items that the user prefers and
+   * that are similar to the given item.</p>
    *
-   * <p>This returns a {@link List} of {@link RecommendedItem} which is a little misleading since it's
-   * returning recommend<strong>ing</strong> items, but, I thought it more natural to just reuse this
-   * class since it encapsulates an {@link Item} and value. The value here does not necessarily have
-   * a consistent interpretation or expected range; it will be higher the more influential the {@link Item}
-   * was in the recommendation.</p>
+   * <p>This returns a {@link List} of {@link RecommendedItem} which is a little misleading since it's returning
+   * recommend<strong>ing</strong> items, but, I thought it more natural to just reuse this class since it encapsulates
+   * an {@link Item} and value. The value here does not necessarily have a consistent interpretation or expected range;
+   * it will be higher the more influential the {@link Item} was in the recommendation.</p>
    *
-   * @param userID ID of {@link org.apache.mahout.cf.taste.model.User} who was recommended the {@link Item}
-   * @param itemID ID of {@link Item} that was recommended
+   * @param userID  ID of {@link org.apache.mahout.cf.taste.model.User} who was recommended the {@link Item}
+   * @param itemID  ID of {@link Item} that was recommended
    * @param howMany maximum number of {@link Item}s to return
-   * @return {@link List} of {@link RecommendedItem}, ordered from most influential in recommended the given
-   *         {@link Item} to least
+   * @return {@link List} of {@link RecommendedItem}, ordered from most influential in recommended the given {@link
+   *         Item} to least
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
    */
   List<RecommendedItem> recommendedBecause(Object userID, Object itemID, int howMany) throws TasteException;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java Fri Jul 10 09:35:19 2009
@@ -20,21 +20,17 @@
 import org.apache.mahout.cf.taste.model.Item;
 
 /**
- * <p>Implementations encapsulate items that are recommended, and include
- * the {@link org.apache.mahout.cf.taste.model.Item} recommended and a value expressing
- * the strength of the preference.</p>
+ * <p>Implementations encapsulate items that are recommended, and include the {@link
+ * org.apache.mahout.cf.taste.model.Item} recommended and a value expressing the strength of the preference.</p>
  */
 public interface RecommendedItem extends Comparable<RecommendedItem> {
 
-  /**
-   * @return the recommended {@link Item}
-   */
+  /** @return the recommended {@link Item} */
   Item getItem();
 
   /**
-   * <p>A value expressing the strength of the preference for the recommended
-   * {@link Item}. The range of the values depends on the implementation.
-   * Implementations must use larger values to express stronger preference.</p>
+   * <p>A value expressing the strength of the preference for the recommended {@link Item}. The range of the values
+   * depends on the implementation. Implementations must use larger values to express stronger preference.</p>
    *
    * @return strength of the preference
    */

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java Fri Jul 10 09:35:19 2009
@@ -25,27 +25,24 @@
 import java.util.List;
 
 /**
- * <p>Implementations of this interface can recommend {@link Item}s for a
- * {@link org.apache.mahout.cf.taste.model.User}. Implementations will likely take advantage of several
- * classes in other packages here to compute this.</p>
+ * <p>Implementations of this interface can recommend {@link Item}s for a {@link org.apache.mahout.cf.taste.model.User}.
+ * Implementations will likely take advantage of several classes in other packages here to compute this.</p>
  */
 public interface Recommender extends Refreshable {
 
   /**
-   * @param userID user for which recommendations are to be computed
+   * @param userID  user for which recommendations are to be computed
    * @param howMany desired number of recommendations
-   * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly
-   *         recommend to least
+   * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to least
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
    */
   List<RecommendedItem> recommend(Object userID, int howMany) throws TasteException;
 
   /**
-   * @param userID user for which recommendations are to be computed
-   * @param howMany desired number of recommendations
+   * @param userID   user for which recommendations are to be computed
+   * @param howMany  desired number of recommendations
    * @param rescorer rescoring function to apply before final list of recommendations is determined
-   * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly
-   *         recommend to least
+   * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to least
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
    */
   List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer) throws TasteException;
@@ -53,9 +50,8 @@
   /**
    * @param userID user ID whose preference is to be estimated
    * @param itemID item ID to estimate preference for
-   * @return an estimated preference if the user has not expressed a preference for the item, or else
-   *         the user's actual preference for the item. If a preference cannot be estimated, returns
-   *         {@link Double#NaN}
+   * @return an estimated preference if the user has not expressed a preference for the item, or else the user's actual
+   *         preference for the item. If a preference cannot be estimated, returns {@link Double#NaN}
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
    */
   double estimatePreference(Object userID, Object itemID) throws TasteException;
@@ -63,7 +59,7 @@
   /**
    * @param userID user to set preference for
    * @param itemID item to set preference for
-   * @param value preference value
+   * @param value  preference value
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
    */
   void setPreference(Object userID, Object itemID, double value) throws TasteException;
@@ -75,9 +71,7 @@
    */
   void removePreference(Object userID, Object itemID) throws TasteException;
 
-  /**
-   * @return {@link DataModel} used by this {@link Recommender}
-   */
+  /** @return {@link DataModel} used by this {@link Recommender} */
   DataModel getDataModel();
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java Fri Jul 10 09:35:19 2009
@@ -18,20 +18,20 @@
 package org.apache.mahout.cf.taste.recommender;
 
 /**
- * <p>A {@link Rescorer} simply assigns a new "score" to a thing like an
- * {@link org.apache.mahout.cf.taste.model.Item} or {@link org.apache.mahout.cf.taste.model.User} which a {@link Recommender}
- * is considering returning as a top recommendation. It may be used to arbitrarily re-rank the results
- * according to application-specific logic before returning recommendations. For example, an application
- * may want to boost the score of items in a certain category just for one request.</p>
+ * <p>A {@link Rescorer} simply assigns a new "score" to a thing like an {@link org.apache.mahout.cf.taste.model.Item}
+ * or {@link org.apache.mahout.cf.taste.model.User} which a {@link Recommender} is considering returning as a top
+ * recommendation. It may be used to arbitrarily re-rank the results according to application-specific logic before
+ * returning recommendations. For example, an application may want to boost the score of items in a certain category
+ * just for one request.</p>
  *
- * <p>A {@link Rescorer} can also exclude a thing from consideration entirely by returning <code>true</code>
- * from {@link #isFiltered(Object)}.</p>
+ * <p>A {@link Rescorer} can also exclude a thing from consideration entirely by returning <code>true</code> from {@link
+ * #isFiltered(Object)}.</p>
  */
 public interface Rescorer<T> {
 
   /**
-   * @param thing thing ({@link org.apache.mahout.cf.taste.model.Item} or
-   * {@link org.apache.mahout.cf.taste.model.User} really) to rescore
+   * @param thing         thing ({@link org.apache.mahout.cf.taste.model.Item} or {@link org.apache.mahout.cf.taste.model.User}
+   *                      really) to rescore
    * @param originalScore original score
    * @return modified score, or {@link Double#NaN} to indicate that this should be excluded entirely
    */
@@ -39,6 +39,7 @@
 
   /**
    * Returns <code>true</code> to exclude the given thing.
+   *
    * @param thing the thing to filter
    * @return <code>true</code> to exclude, <code>false</code> otherwise
    */

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java Fri Jul 10 09:35:19 2009
@@ -22,13 +22,11 @@
 
 import java.util.List;
 
-/**
- * <p>Interface implemented by "user-based" recommenders.</p>
- */
+/** <p>Interface implemented by "user-based" recommenders.</p> */
 public interface UserBasedRecommender extends Recommender {
 
   /**
-   * @param userID ID of {@link User} for which to find most similar other {@link User}s
+   * @param userID  ID of {@link User} for which to find most similar other {@link User}s
    * @param howMany desired number of most similar {@link User}s to find
    * @return {@link User}s most similar to the given user
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
@@ -36,10 +34,10 @@
   List<User> mostSimilarUsers(Object userID, int howMany) throws TasteException;
 
   /**
-   * @param userID ID of {@link User} for which to find most similar other {@link User}s
-   * @param howMany desired number of most similar {@link User}s to find
-   * @param rescorer {@link Rescorer} which can adjust user-user similarity
-   * estimates used to determine most similar users
+   * @param userID   ID of {@link User} for which to find most similar other {@link User}s
+   * @param howMany  desired number of most similar {@link User}s to find
+   * @param rescorer {@link Rescorer} which can adjust user-user similarity estimates used to determine most similar
+   *                 users
    * @return {@link User}s most similar to the given user
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
    */

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/slopeone/DiffStorage.java Fri Jul 10 09:35:19 2009
@@ -27,57 +27,44 @@
 import java.util.Set;
 
 /**
- * <p>Implementations store item-item preference diffs for a
- * {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}.
- * It actually does a bit more for this implementation, like listing all items that may be
- * considered for recommedation, in order to maximize what implementations can do
- * to optimize the slope-one algorithm.</p>
+ * <p>Implementations store item-item preference diffs for a {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}.
+ * It actually does a bit more for this implementation, like listing all items that may be considered for recommedation,
+ * in order to maximize what implementations can do to optimize the slope-one algorithm.</p>
  *
  * @see org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender
  */
 public interface DiffStorage extends Refreshable {
 
   /**
-   * @param itemID1
-   * @param itemID2
-   * @return {@link RunningAverage} encapsulating the average difference in preferences
-   *         between items corresponding to <code>itemID1</code> and <code>itemID2</code>, in that direction; that is,
-   *         it's the average of item 2's preferences minus item 1's preferences
-   * @throws TasteException
+   * @return {@link RunningAverage} encapsulating the average difference in preferences between items corresponding to
+   *         <code>itemID1</code> and <code>itemID2</code>, in that direction; that is, it's the average of item 2's
+   *         preferences minus item 1's preferences
    */
   RunningAverage getDiff(Object itemID1, Object itemID2) throws TasteException;
 
   /**
    * @param userID user ID to get diffs for
    * @param itemID itemID to assess
-   * @param prefs user's preferendces
+   * @param prefs  user's preferendces
    * @return {@link List} of {@link RunningAverage} for that user's item-item diffs
-   * @throws TasteException
    */
   RunningAverage[] getDiffs(Object userID, Object itemID, Preference[] prefs) throws TasteException;
 
-  /**
-   * @param itemID
-   * @return {@link RunningAverage} encapsulating the average preference for the given item
-   * @throws TasteException
-   */
+  /** @return {@link RunningAverage} encapsulating the average preference for the given item */
   RunningAverage getAverageItemPref(Object itemID) throws TasteException;
 
   /**
    * <p>Updates internal data structures to reflect an update in a preference value for an item.</p>
    *
-   * @param itemID item to update preference value for
+   * @param itemID    item to update preference value for
    * @param prefDelta amount by which preference value changed (or its old value, if being removed
-   * @param remove if <code>true</code>, operation reflects a removal rather than change of preference
-   * @throws TasteException
+   * @param remove    if <code>true</code>, operation reflects a removal rather than change of preference
    */
   void updateItemPref(Object itemID, double prefDelta, boolean remove) throws TasteException;
 
   /**
-   * @param userID
-   * @return {@link Item}s that may possibly be recommended to the given user, which may not be all
-   *         {@link Item}s since the item-item diff matrix may be sparses
-   * @throws TasteException
+   * @return {@link Item}s that may possibly be recommended to the given user, which may not be all {@link Item}s since
+   *         the item-item diff matrix may be sparses
    */
   Set<Item> getRecommendableItems(Object userID) throws TasteException;
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java Fri Jul 10 09:35:19 2009
@@ -22,17 +22,16 @@
 import org.apache.mahout.cf.taste.model.Item;
 
 /**
- * <p>Implementations of this interface define a notion of similarity between two
- * {@link Item}s. Implementations should return values in the range -1.0 to 1.0, with
- * 1.0 representing perfect similarity.</p>
+ * <p>Implementations of this interface define a notion of similarity between two {@link Item}s. Implementations should
+ * return values in the range -1.0 to 1.0, with 1.0 representing perfect similarity.</p>
  *
  * @see UserSimilarity
  */
 public interface ItemSimilarity extends Refreshable {
 
   /**
-   * <p>Returns the degree of similarity, of two {@link Item}s, based
-   * on the preferences that {@link org.apache.mahout.cf.taste.model.User}s have expressed for the items.</p>
+   * <p>Returns the degree of similarity, of two {@link Item}s, based on the preferences that {@link
+   * org.apache.mahout.cf.taste.model.User}s have expressed for the items.</p>
    *
    * @param item1 first item
    * @param item2 second item

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java Fri Jul 10 09:35:19 2009
@@ -23,9 +23,9 @@
 import org.apache.mahout.cf.taste.model.User;
 
 /**
- * <p>Implementations of this interface compute an inferred preference for a {@link User} and an {@link Item}
- * that the user has not expressed any preference for. This might be an average of other preferences scores
- * from that user, for example. This technique is sometimes called "default voting".</p>
+ * <p>Implementations of this interface compute an inferred preference for a {@link User} and an {@link Item} that the
+ * user has not expressed any preference for. This might be an average of other preferences scores from that user, for
+ * example. This technique is sometimes called "default voting".</p>
  */
 public interface PreferenceInferrer extends Refreshable {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java Fri Jul 10 09:35:19 2009
@@ -22,17 +22,15 @@
 import org.apache.mahout.cf.taste.model.User;
 
 /**
- * <p>Implementations of this interface define a notion of similarity between two
- * {@link User}s. Implementations should return values in the range -1.0 to 1.0, with
- * 1.0 representing perfect similarity.</p>
+ * <p>Implementations of this interface define a notion of similarity between two {@link User}s. Implementations should
+ * return values in the range -1.0 to 1.0, with 1.0 representing perfect similarity.</p>
  *
  * @see ItemSimilarity
  */
 public interface UserSimilarity extends Refreshable {
 
   /**
-   * <p>Returns the degree of similarity, of two {@link User}s, based
-   * on the their preferences.</p>
+   * <p>Returns the degree of similarity, of two {@link User}s, based on the their preferences.</p>
    *
    * @param user1 first user
    * @param user2 second user

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java Fri Jul 10 09:35:19 2009
@@ -22,10 +22,10 @@
 import org.apache.mahout.cf.taste.model.Preference;
 
 /**
- * <p>Implementations encapsulate a transform on a {@link Preference}'s value. These transformations are
- * typically applied to values before they are used to compute a similarity value. They are typically not
- * applied elsewhere; in particular {@link org.apache.mahout.cf.taste.model.DataModel}s no longer use a transform
- * like this to transform all of their preference values at the source.</p>
+ * <p>Implementations encapsulate a transform on a {@link Preference}'s value. These transformations are typically
+ * applied to values before they are used to compute a similarity value. They are typically not applied elsewhere; in
+ * particular {@link org.apache.mahout.cf.taste.model.DataModel}s no longer use a transform like this to transform all
+ * of their preference values at the source.</p>
  */
 public interface PreferenceTransform extends Refreshable {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java Fri Jul 10 09:35:19 2009
@@ -20,16 +20,14 @@
 import org.apache.mahout.cf.taste.common.Refreshable;
 
 /**
- * <p>Implementations encapsulate some transformation on similarity values between two
- * things, where things might be {@link org.apache.mahout.cf.taste.model.User}s or
- * {@link org.apache.mahout.cf.taste.model.Item}s or
- * something else.</p>
+ * <p>Implementations encapsulate some transformation on similarity values between two things, where things might be
+ * {@link org.apache.mahout.cf.taste.model.User}s or {@link org.apache.mahout.cf.taste.model.Item}s or something
+ * else.</p>
  */
 public interface SimilarityTransform<T> extends Refreshable {
 
   /**
-   * @param value original similarity between thing1 and thing2
-   * (should be in [-1,1])
+   * @param value original similarity between thing1 and thing2 (should be in [-1,1])
    * @return transformed similarity (should be in [-1,1])
    */
   double transformSimilarity(T thing1, T thing2, double value);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/BayesFileFormatter.java Fri Jul 10 09:35:19 2009
@@ -17,13 +17,13 @@
 
 package org.apache.mahout.classifier;
 
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
-import org.apache.commons.cli2.builder.ArgumentBuilder;
-import org.apache.commons.cli2.builder.GroupBuilder;
-import org.apache.commons.cli2.Option;
 import org.apache.commons.cli2.CommandLine;
 import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
 import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
 import org.apache.commons.cli2.commandline.Parser;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.Token;
@@ -32,6 +32,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.Closeable;
 import java.io.File;
 import java.io.FileFilter;
 import java.io.FileInputStream;
@@ -41,15 +42,13 @@
 import java.io.OutputStreamWriter;
 import java.io.Reader;
 import java.io.Writer;
-import java.io.Closeable;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 
 /**
- * Flatten a file into format that can be read by the Bayes M/R job. <p/> One
- * document per line, first token is the label followed by a tab, rest of the
- * line are the terms.
+ * Flatten a file into format that can be read by the Bayes M/R job. <p/> One document per line, first token is the
+ * label followed by a tab, rest of the line are the terms.
  */
 public class BayesFileFormatter {
 
@@ -61,18 +60,16 @@
   }
 
   /**
-   * Collapse all the files in the inputDir into a single file in the proper
-   * Bayes format, 1 document per line
-   * 
-   * @param label The label
-   * @param analyzer The analyzer to use
-   * @param inputDir The input Directory
-   * @param charset The charset of the input files
+   * Collapse all the files in the inputDir into a single file in the proper Bayes format, 1 document per line
+   *
+   * @param label      The label
+   * @param analyzer   The analyzer to use
+   * @param inputDir   The input Directory
+   * @param charset    The charset of the input files
    * @param outputFile The file to collapse to
-   * @throws java.io.IOException
    */
   public static void collapse(String label, Analyzer analyzer, File inputDir,
-      Charset charset, File outputFile) throws IOException {
+                              Charset charset, File outputFile) throws IOException {
     Writer writer = new OutputStreamWriter(new FileOutputStream(outputFile),
         charset);
     try {
@@ -85,17 +82,15 @@
 
   /**
    * Write the input files to the outdir, one output file per input file
-   * 
-   * @param label The label of the file
+   *
+   * @param label    The label of the file
    * @param analyzer The analyzer to use
-   * @param input The input file or directory. May not be null
-   * @param charset The Character set of the input files
-   * @param outDir The output directory. Files will be written there with the
-   *        same name as the input file
-   * @throws IOException
+   * @param input    The input file or directory. May not be null
+   * @param charset  The Character set of the input files
+   * @param outDir   The output directory. Files will be written there with the same name as the input file
    */
   public static void format(String label, Analyzer analyzer, File input,
-      Charset charset, File outDir) throws IOException {
+                            Charset charset, File outDir) throws IOException {
     if (input.isDirectory()) {
       input.listFiles(new FileProcessor(label, analyzer, charset, outDir));
     } else {
@@ -110,8 +105,8 @@
   }
 
   /**
-   * Hack the FileFilter mechanism so that we don't get stuck on large
-   * directories and don't have to loop the list twice
+   * Hack the FileFilter mechanism so that we don't get stuck on large directories and don't have to loop the list
+   * twice
    */
   private static class FileProcessor implements FileFilter {
     private final String label;
@@ -126,14 +121,12 @@
 
     /**
      * Use this when you want to collapse all files to a single file
-     * 
-     * @param label The label
-     * @param analyzer
-     * @param charset
+     *
+     * @param label  The label
      * @param writer must not be null and will not be closed
      */
     private FileProcessor(String label, Analyzer analyzer, Charset charset,
-        Writer writer) {
+                          Writer writer) {
       this.label = label;
       this.analyzer = analyzer;
       this.charset = charset;
@@ -142,14 +135,11 @@
 
     /**
      * Use this when you want a writer per file
-     * 
-     * @param label
-     * @param analyzer
-     * @param charset
+     *
      * @param outputDir must not be null.
      */
     private FileProcessor(String label, Analyzer analyzer, Charset charset,
-        File outputDir) {
+                          File outputDir) {
       this.label = label;
       this.analyzer = analyzer;
       this.charset = charset;
@@ -189,16 +179,16 @@
 
   /**
    * Write the tokens and the label from the Reader to the writer
-   * 
-   * @param label The label
+   *
+   * @param label    The label
    * @param analyzer The analyzer to use
-   * @param inFile the file to read and whose contents are passed to the analyzer
-   * @param charset character encoding to assume when reading the input file
-   * @param writer The Writer, is not closed by this method
+   * @param inFile   the file to read and whose contents are passed to the analyzer
+   * @param charset  character encoding to assume when reading the input file
+   * @param writer   The Writer, is not closed by this method
    * @throws java.io.IOException if there was a problem w/ the reader
    */
   private static void writeFile(String label, Analyzer analyzer, File inFile,
-      Charset charset, Writer writer) throws IOException {
+                                Charset charset, Writer writer) throws IOException {
     Reader reader = new InputStreamReader(new FileInputStream(inFile), charset);
     try {
       TokenStream ts = analyzer.tokenStream(label, reader);
@@ -229,11 +219,10 @@
 
   /**
    * Convert a Reader to a vector
-   * 
+   *
    * @param analyzer The Analyzer to use
-   * @param reader The reader to feed to the Analyzer
+   * @param reader   The reader to feed to the Analyzer
    * @return An array of unique tokens
-   * @throws IOException
    */
   public static String[] readerToDocument(Analyzer analyzer, Reader reader)
       throws IOException {
@@ -252,45 +241,45 @@
 
   /**
    * Run the FileFormatter
-   * 
+   *
    * @param args The input args. Run with -h to see the help
    * @throws ClassNotFoundException if the Analyzer can't be found
    * @throws IllegalAccessException if the Analyzer can't be constructed
    * @throws InstantiationException if the Analyzer can't be constructed
-   * @throws IOException if the files can't be dealt with properly
+   * @throws IOException            if the files can't be dealt with properly
    */
-   public static void main(String[] args) throws ClassNotFoundException,
+  public static void main(String[] args) throws ClassNotFoundException,
       IllegalAccessException, InstantiationException, IOException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
     Option inputOpt = obuilder.withLongName("input").withRequired(true).withArgument(
-            abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Input file").withShortName("i").create();
+        abuilder.withName("input").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Input file").withShortName("i").create();
 
     Option outputOpt = obuilder.withLongName("output").withRequired(true).withArgument(
-            abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
-            withDescription("The output file").withShortName("o").create();
+        abuilder.withName("output").withMinimum(1).withMaximum(1).create()).
+        withDescription("The output file").withShortName("o").create();
 
     Option labelOpt = obuilder.withLongName("label").withRequired(true).withArgument(
-            abuilder.withName("label").withMinimum(1).withMaximum(1).create()).
-            withDescription("The label of the file").withShortName("l").create();
+        abuilder.withName("label").withMinimum(1).withMaximum(1).create()).
+        withDescription("The label of the file").withShortName("l").create();
 
     Option analyzerOpt = obuilder.withLongName("analyzer").withArgument(
-            abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).
-            withDescription("The fully qualified class name of the analyzer to use.  Must have a no-arg constructor.  Default is the StandardAnalyzer").withShortName("a").create();
+        abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).
+        withDescription("The fully qualified class name of the analyzer to use.  Must have a no-arg constructor.  Default is the StandardAnalyzer").withShortName("a").create();
 
     Option charsetOpt = obuilder.withLongName("charset").withArgument(
-            abuilder.withName("charset").withMinimum(1).withMaximum(1).create()).
-            withDescription("The character encoding of the input file").withShortName("c").create();
+        abuilder.withName("charset").withMinimum(1).withMaximum(1).create()).
+        withDescription("The character encoding of the input file").withShortName("c").create();
 
     Option collapseOpt = obuilder.withLongName("collapse").withRequired(true).withArgument(
-            abuilder.withName("collapse").withMinimum(1).withMaximum(1).create()).
-            withDescription("Collapse a whole directory to a single file, one doc per line").withShortName("p").create();
+        abuilder.withName("collapse").withMinimum(1).withMaximum(1).create()).
+        withDescription("Collapse a whole directory to a single file, one doc per line").withShortName("p").create();
 
     Option helpOpt = obuilder.withLongName("help").withRequired(true).
-            withDescription("Print out help").withShortName("h").create();
+        withDescription("Print out help").withShortName("h").create();
     Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(labelOpt).withOption(analyzerOpt).withOption(charsetOpt).withOption(collapseOpt).withOption(helpOpt).create();
     try {
       Parser parser = new Parser();
@@ -298,7 +287,7 @@
       CommandLine cmdLine = parser.parse(args);
 
       if (cmdLine.hasOption(helpOpt)) {
-        
+
         return;
       }
       File input = new File((String) cmdLine.getValue(inputOpt));
@@ -307,7 +296,7 @@
       Analyzer analyzer;
       if (cmdLine.hasOption(analyzerOpt)) {
         analyzer = Class.forName(
-                (String) cmdLine.getValue(analyzerOpt)).asSubclass(Analyzer.class).newInstance();
+            (String) cmdLine.getValue(analyzerOpt)).asSubclass(Analyzer.class).newInstance();
       } else {
         analyzer = new StandardAnalyzer();
       }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ClassifierResult.java Fri Jul 10 09:35:19 2009
@@ -17,9 +17,7 @@
 
 package org.apache.mahout.classifier;
 
-/**
- * Result of a Document Classification. The label and the associated score(Usually probabilty)
- */
+/** Result of a Document Classification. The label and the associated score(Usually probabilty) */
 public class ClassifierResult {
   private String label;
   private double score;
@@ -38,15 +36,15 @@
 
   public String getLabel() {
     return label;
-  }  
+  }
 
   public double getScore() {
     return score;
   }
-  
+
   public void setLabel(String label) {
     this.label = label;
-  }  
+  }
 
   public void setScore(double score) {
     this.score = score;
@@ -55,8 +53,8 @@
   @Override
   public String toString() {
     return "ClassifierResult{" +
-            "category='" + label + '\'' +
-            ", score=" + score +
-            '}';
+        "category='" + label + '\'' +
+        ", score=" + score +
+        '}';
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java Fri Jul 10 09:35:19 2009
@@ -18,14 +18,13 @@
 package org.apache.mahout.classifier;
 
 import org.apache.commons.cli2.CommandLine;
-import org.apache.commons.cli2.Option;
 import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
 import org.apache.commons.cli2.OptionException;
-import org.apache.commons.cli2.commandline.Parser;
-import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 import org.apache.commons.cli2.builder.GroupBuilder;
-
+import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
@@ -42,12 +41,12 @@
 
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.nio.charset.Charset;
 
 public class Classify {
 
@@ -57,37 +56,37 @@
   }
 
   public static void main(String[] args)
-          throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, OptionException {
+      throws IOException, ClassNotFoundException, IllegalAccessException, InstantiationException, OptionException {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
 
     Option pathOpt = obuilder.withLongName("path").withRequired(true).withArgument(
-            abuilder.withName("path").withMinimum(1).withMaximum(1).create()).withDescription("The local file system path").withShortName("p").create();
+        abuilder.withName("path").withMinimum(1).withMaximum(1).create()).withDescription("The local file system path").withShortName("p").create();
 
     Option classifyOpt = obuilder.withLongName("classify").withRequired(true).withArgument(
-            abuilder.withName("classify").withMinimum(1).withMaximum(1).create()).
-            withDescription("The doc to classify").withShortName("").create();
+        abuilder.withName("classify").withMinimum(1).withMaximum(1).create()).
+        withDescription("The doc to classify").withShortName("").create();
 
     Option encodingOpt = obuilder.withLongName("encoding").withRequired(true).withArgument(
-            abuilder.withName("encoding").withMinimum(1).withMaximum(1).create()).
-            withDescription("The file encoding.  Default: UTF-8").withShortName("e").create();
+        abuilder.withName("encoding").withMinimum(1).withMaximum(1).create()).
+        withDescription("The file encoding.  Default: UTF-8").withShortName("e").create();
 
     Option analyzerOpt = obuilder.withLongName("analyzer").withRequired(true).withArgument(
-            abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).
-            withDescription("The Analyzer to use").withShortName("a").create();
+        abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()).
+        withDescription("The Analyzer to use").withShortName("a").create();
 
     Option defaultCatOpt = obuilder.withLongName("defaultCat").withRequired(true).withArgument(
-            abuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create()).
-            withDescription("The default category").withShortName("d").create();
+        abuilder.withName("defaultCat").withMinimum(1).withMaximum(1).create()).
+        withDescription("The default category").withShortName("d").create();
 
     Option gramSizeOpt = obuilder.withLongName("gramSize").withRequired(true).withArgument(
-            abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()).
-            withDescription("Size of the n-gram").withShortName("ng").create();
+        abuilder.withName("gramSize").withMinimum(1).withMaximum(1).create()).
+        withDescription("Size of the n-gram").withShortName("ng").create();
 
     Option typeOpt = obuilder.withLongName("classifierType").withRequired(true).withArgument(
-            abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()).
-            withDescription("Type of classifier").withShortName("type").create();
+        abuilder.withName("classifierType").withMinimum(1).withMaximum(1).create()).
+        withDescription("Type of classifier").withShortName("type").create();
 
     Group options = gbuilder.withName("Options").withOption(pathOpt).withOption(classifyOpt).withOption(encodingOpt).withOption(analyzerOpt).withOption(defaultCatOpt).withOption(gramSizeOpt).withOption(typeOpt).create();
 
@@ -159,11 +158,10 @@
     log.info("Converting input document to proper format");
     String[] document = BayesFileFormatter.readerToDocument(analyzer, new InputStreamReader(new FileInputStream(docPath), Charset.forName(encoding)));
     StringBuilder line = new StringBuilder();
-    for(String token : document)
-    {
+    for (String token : document) {
       line.append(token).append(' ');
     }
-    List<String> doc = Model.generateNGramsWithoutLabel(line.toString(), gramSize) ;
+    List<String> doc = Model.generateNGramsWithoutLabel(line.toString(), gramSize);
     log.info("Done converting");
     log.info("Classifying document: {}", docPath);
     ClassifierResult category = classifier.classify(model, doc.toArray(new String[doc.size()]), defaultCat);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ConfusionMatrix.java Fri Jul 10 09:35:19 2009
@@ -56,11 +56,11 @@
     return labels;
   }
 
-  public double getAccuracy(String label){
+  public double getAccuracy(String label) {
     int labelId = labelMap.get(label);
     int labelTotal = 0;
     int correct = 0;
-    for(int i = 0 ;i < labels.size() ;i++){
+    for (int i = 0; i < labels.size(); i++) {
       labelTotal += confusionMatrix[labelId][i];
       if (i == labelId) {
         correct = confusionMatrix[labelId][i];
@@ -69,33 +69,33 @@
     return 100.0 * correct / labelTotal;
   }
 
-  public int getCorrect(String label){
+  public int getCorrect(String label) {
     int labelId = labelMap.get(label);
     return confusionMatrix[labelId][labelId];
   }
 
 
-  public double getTotal(String label){
+  public double getTotal(String label) {
     int labelId = labelMap.get(label);
     int labelTotal = 0;
-    for (int i = 0 ;i < labels.size() ;i++){
-      labelTotal+= confusionMatrix[labelId][i];
+    for (int i = 0; i < labels.size(); i++) {
+      labelTotal += confusionMatrix[labelId][i];
     }
     return labelTotal;
   }
 
   public void addInstance(String correctLabel, ClassifierResult classifiedResult) {
     incrementCount(correctLabel, classifiedResult.getLabel());
-  }  
-  
+  }
+
   public void addInstance(String correctLabel, String classifiedLabel) {
     incrementCount(correctLabel, classifiedLabel);
   }
-  
+
   public int getCount(String correctLabel, String classifiedLabel) {
     if (labels.contains(correctLabel)
         && labels.contains(classifiedLabel) == false && defaultLabel.equals(classifiedLabel) == false) {
-      throw new IllegalArgumentException("Label not found " +correctLabel + ' ' +classifiedLabel );
+      throw new IllegalArgumentException("Label not found " + correctLabel + ' ' + classifiedLabel);
     }
     int correctId = labelMap.get(correctLabel);
     int classifiedId = labelMap.get(classifiedLabel);
@@ -113,7 +113,7 @@
   }
 
   public void incrementCount(String correctLabel, String classifiedLabel,
-      int count) {
+                             int count) {
     putCount(correctLabel, classifiedLabel, count
         + getCount(correctLabel, classifiedLabel));
   }
@@ -123,8 +123,9 @@
   }
 
   public ConfusionMatrix merge(ConfusionMatrix b) {
-    if (labels.size() != b.getLabels().size())
+    if (labels.size() != b.getLabels().size()) {
       throw new IllegalArgumentException("The Labels do not Match");
+    }
 
     //if (labels.containsAll(b.getLabels()))
     //  ;
@@ -172,12 +173,12 @@
   static String getSmallLabel(int i) {
     int val = i;
     StringBuilder returnString = new StringBuilder();
-    do{
+    do {
       int n = val % 26;
       int c = 'a';
-      returnString.insert(0, (char)(c + n));
+      returnString.insert(0, (char) (c + n));
       val /= 26;
-    }while(val>0);
+    } while (val > 0);
     return returnString.toString();
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/ResultAnalyzer.java Fri Jul 10 09:35:19 2009
@@ -43,14 +43,16 @@
     confusionMatrix = new ConfusionMatrix(labelSet, defaultLabel);
   }
 
-  public ConfusionMatrix getConfusionMatrix(){
+  public ConfusionMatrix getConfusionMatrix() {
     return this.confusionMatrix;
   }
+
   public void addInstance(String correctLabel, ClassifierResult classifiedResult) {
-    if (correctLabel.equals(classifiedResult.getLabel()))
+    if (correctLabel.equals(classifiedResult.getLabel())) {
       correctlyClassified++;
-    else
+    } else {
       incorrectlyClassified++;
+    }
     confusionMatrix.addInstance(correctLabel, classifiedResult);
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesClassifier.java Fri Jul 10 09:35:19 2009
@@ -23,15 +23,12 @@
 import org.apache.mahout.common.Model;
 
 import java.util.Collection;
+import java.util.Deque;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.Map;
-import java.util.Deque;
-
 
-/**
- * Classifies documents based on a {@link BayesModel}}.  
- */
+/** Classifies documents based on a {@link BayesModel}}. */
 public class BayesClassifier implements Classifier {
 
   /**
@@ -40,17 +37,17 @@
    * @param model           The model
    * @param document        The document to classify
    * @param defaultCategory The default category to assign
-   * @param numResults      The maximum number of results to return, ranked by score.
-   *                        Ties are broken by comparing the category
-   * @return A Collection of {@link org.apache.mahout.classifier.ClassifierResult}s.
+   * @param numResults      The maximum number of results to return, ranked by score. Ties are broken by comparing the
+   *                        category
+   * @return A Collection of {@link ClassifierResult}s.
    */
   @Override
   public Collection<ClassifierResult> classify(Model model, String[] document, String defaultCategory, int numResults) {
     Collection<String> categories = model.getLabels();
-    
+
     PriorityQueue<ClassifierResult> pq = new ClassifierResultPriorityQueue(numResults);
     ClassifierResult tmp;
-    for (String category : categories){
+    for (String category : categories) {
       double prob = documentWeight(model, category, document);
       if (prob > 0.0) {
         tmp = new ClassifierResult(category, prob);
@@ -62,7 +59,7 @@
     while ((tmp = pq.pop()) != null) {
       result.addLast(tmp);
     }
-    if (result.isEmpty()){
+    if (result.isEmpty()) {
       result.add(new ClassifierResult(defaultCategory, 0));
     }
     return result;
@@ -94,12 +91,12 @@
   }
 
   /**
-   * Calculate the document weight as the multiplication of the
-   * {@link org.apache.mahout.common.Model#featureWeight(String, String)} for each word given the label
+   * Calculate the document weight as the multiplication of the {@link Model#featureWeight(String,
+   * String)} for each word given the label
    *
-   * @param model       The {@link org.apache.mahout.common.Model}
-   * @param label       The label to calculate the probability of
-   * @param document    The document
+   * @param model    The {@link Model}
+   * @param label    The label to calculate the probability of
+   * @param document The document
    * @return The probability
    * @see Model#featureWeight(String, String)
    */
@@ -109,7 +106,7 @@
     for (String word : document) {
       int[] count = wordList.get(word);
       if (count == null) {
-        count = new int[] { 0 };
+        count = new int[]{0};
         wordList.put(word, count);
       }
       count[0]++;
@@ -123,7 +120,6 @@
     return result;
   }
 
-  
   private static class ClassifierResultPriorityQueue extends PriorityQueue<ClassifierResult> {
 
     private ClassifierResultPriorityQueue(int numResults) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesDriver.java Fri Jul 10 09:35:19 2009
@@ -28,9 +28,7 @@
 
 import java.io.IOException;
 
-/**
- * Create and run the Bayes Trainer.
- */
+/** Create and run the Bayes Trainer. */
 public class BayesDriver {
 
   private static final Logger log = LoggerFactory.getLogger(BayesDriver.class);
@@ -39,15 +37,10 @@
   }
 
   /**
-   * Takes in two arguments:
-   * <ol>
-   * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents
-   * live</li>
-   * <li>The output {@link org.apache.hadoop.fs.Path} where to write the
-   * {@link org.apache.mahout.common.Model} as a
-   * {@link org.apache.hadoop.io.SequenceFile}</li>
-   * </ol>
-   * 
+   * Takes in two arguments: <ol> <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live</li>
+   * <li>The output {@link org.apache.hadoop.fs.Path} where to write the {@link org.apache.mahout.common.Model} as a
+   * {@link org.apache.hadoop.io.SequenceFile}</li> </ol>
+   *
    * @param args The args
    */
   public static void main(String[] args) throws IOException {
@@ -59,16 +52,17 @@
 
   /**
    * Run the job
-   * 
-   * @param input the input pathname String
+   *
+   * @param input  the input pathname String
    * @param output the output pathname String
    */
   public static void runJob(String input, String output, int gramSize) throws IOException {
     JobConf conf = new JobConf(BayesDriver.class);
     Path outPath = new Path(output);
     FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
-    if (dfs.exists(outPath))
+    if (dfs.exists(outPath)) {
       dfs.delete(outPath, true);
+    }
 
     log.info("Reading features...");
     //Read the features in each document normalized by length of each document
@@ -92,30 +86,36 @@
     //Calculate the normalization factor Sigma_W_ij for each complement class.
     //CBayesNormalizedWeightDriver.runJob(input, output);
 
-    Path docCountOutPath = new Path(output+ "/trainer-docCount");
-    if (dfs.exists(docCountOutPath))
+    Path docCountOutPath = new Path(output + "/trainer-docCount");
+    if (dfs.exists(docCountOutPath)) {
       dfs.delete(docCountOutPath, true);
-    Path termDocCountOutPath = new Path(output+ "/trainer-termDocCount");
-    if (dfs.exists(termDocCountOutPath))
+    }
+    Path termDocCountOutPath = new Path(output + "/trainer-termDocCount");
+    if (dfs.exists(termDocCountOutPath)) {
       dfs.delete(termDocCountOutPath, true);
-    Path featureCountOutPath = new Path(output+ "/trainer-featureCount");
-    if (dfs.exists(featureCountOutPath))
+    }
+    Path featureCountOutPath = new Path(output + "/trainer-featureCount");
+    if (dfs.exists(featureCountOutPath)) {
       dfs.delete(featureCountOutPath, true);
-    Path wordFreqOutPath = new Path(output+ "/trainer-wordFreq");
-    if (dfs.exists(wordFreqOutPath))
+    }
+    Path wordFreqOutPath = new Path(output + "/trainer-wordFreq");
+    if (dfs.exists(wordFreqOutPath)) {
       dfs.delete(wordFreqOutPath, true);
-    Path vocabCountPath = new Path(output+ "/trainer-tfIdf/trainer-vocabCount");
-    if (dfs.exists(vocabCountPath))
+    }
+    Path vocabCountPath = new Path(output + "/trainer-tfIdf/trainer-vocabCount");
+    if (dfs.exists(vocabCountPath)) {
       dfs.delete(vocabCountPath, true);
+    }
     /*Path tfIdfOutPath = new Path(output+ "/trainer-tfIdf");
     if (dfs.exists(tfIdfOutPath))
       dfs.delete(tfIdfOutPath, true);*/
-    Path vocabCountOutPath = new Path(output+ "/trainer-vocabCount");
-    if (dfs.exists(vocabCountOutPath))
+    Path vocabCountOutPath = new Path(output + "/trainer-vocabCount");
+    if (dfs.exists(vocabCountOutPath)) {
       dfs.delete(vocabCountOutPath, true);
-   /* Path weightsOutPath = new Path(output+ "/trainer-weights");
-    if (dfs.exists(weightsOutPath))
-      dfs.delete(weightsOutPath, true);*/
+    }
+    /* Path weightsOutPath = new Path(output+ "/trainer-weights");
+ if (dfs.exists(weightsOutPath))
+   dfs.delete(weightsOutPath, true);*/
     /*Path thetaOutPath = new Path(output+ "/trainer-theta");
     if (dfs.exists(thetaOutPath))
       dfs.delete(thetaOutPath, true);*/

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java?rev=792856&r1=792855&r2=792856&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/BayesModel.java Fri Jul 10 09:35:19 2009
@@ -30,21 +30,20 @@
   @Override
   protected double getWeight(Integer label, Integer feature) {
     double result = 0.0;
-    Map<Integer,Double> featureWeights = featureLabelWeights.get(feature);
+    Map<Integer, Double> featureWeights = featureLabelWeights.get(feature);
+
 
-    
     if (featureWeights.containsKey(label)) {
       result = featureWeights.get(label).floatValue();
     }
-    
+
     double vocabCount = featureList.size();
     double sumLabelWeight = getSumLabelWeight(label);
 
+    double numerator = result + alpha_i;
+    double denominator = (sumLabelWeight + vocabCount);
 
-    double numerator =  result + alpha_i;
-    double denominator =(sumLabelWeight + vocabCount);
-    
-    double weight = Math.log(numerator /denominator);
+    double weight = Math.log(numerator / denominator);
     result = -weight;
 
     return result;
@@ -53,7 +52,7 @@
   @Override
   protected double getWeightUnprocessed(Integer label, Integer feature) {
     double result;
-    Map<Integer,Double> featureWeights = featureLabelWeights.get(feature);
+    Map<Integer, Double> featureWeights = featureLabelWeights.get(feature);
 
     if (featureWeights.containsKey(label)) {
       result = featureWeights.get(label);
@@ -85,65 +84,67 @@
 
   @Override
   public void generateModel() {
-      double vocabCount = featureList.size();
+    double vocabCount = featureList.size();
 
-      double[] perLabelThetaNormalizer = new double[labelList.size()];
+    double[] perLabelThetaNormalizer = new double[labelList.size()];
 
     for (int feature = 0, maxFeatures = featureList.size(); feature < maxFeatures; feature++) {
-        Integer featureInt = feature;
-        for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
+      Integer featureInt = feature;
+      for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
 
-          Integer labelInt = label;
-          double D_ij = getWeightUnprocessed(labelInt, featureInt);
-          double sumLabelWeight = getSumLabelWeight(labelInt);
-          //double sigma_j = getSumFeatureWeight(featureInt);
+        Integer labelInt = label;
+        double D_ij = getWeightUnprocessed(labelInt, featureInt);
+        double sumLabelWeight = getSumLabelWeight(labelInt);
+        //double sigma_j = getSumFeatureWeight(featureInt);
 
-          double numerator = D_ij + alpha_i;
-          double denominator = sumLabelWeight + vocabCount;
+        double numerator = D_ij + alpha_i;
+        double denominator = sumLabelWeight + vocabCount;
 
-          double weight = Math.log(numerator / denominator);
+        double weight = Math.log(numerator / denominator);
 
-          if (D_ij != 0)
-            setWeight(labelInt, featureInt, weight);
+        if (D_ij != 0) {
+          setWeight(labelInt, featureInt, weight);
+        }
 
-          perLabelThetaNormalizer[label] += weight;
+        perLabelThetaNormalizer[label] += weight;
 
-        }
       }
-      log.info("Normalizing Weights");
+    }
+    log.info("Normalizing Weights");
     double perLabelWeightSumNormalisationFactor = Double.MAX_VALUE;
     for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
-        double Sigma_W_ij = perLabelThetaNormalizer[label];
-        if (perLabelWeightSumNormalisationFactor > Math.abs(Sigma_W_ij)) {
-          perLabelWeightSumNormalisationFactor = Math.abs(Sigma_W_ij);
-        }
+      double Sigma_W_ij = perLabelThetaNormalizer[label];
+      if (perLabelWeightSumNormalisationFactor > Math.abs(Sigma_W_ij)) {
+        perLabelWeightSumNormalisationFactor = Math.abs(Sigma_W_ij);
       }
+    }
 
-      for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
-        double Sigma_W_ij = perLabelThetaNormalizer[label];
-        perLabelThetaNormalizer[label] = Sigma_W_ij
-            / perLabelWeightSumNormalisationFactor;
-      }
+    for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
+      double Sigma_W_ij = perLabelThetaNormalizer[label];
+      perLabelThetaNormalizer[label] = Sigma_W_ij
+          / perLabelWeightSumNormalisationFactor;
+    }
 
-      for (int feature = 0, maxFeatures = featureList.size(); feature < maxFeatures; feature++) {
-        Integer featureInt = feature;
-        for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
-          Integer labelInt = label;
-          double W_ij = getWeightUnprocessed(labelInt, featureInt);
-          if (W_ij == 0)
-            continue;
-          double Sigma_W_ij = perLabelThetaNormalizer[label];
-          double normalizedWeight = -W_ij / Sigma_W_ij;
-          setWeight(labelInt, featureInt, normalizedWeight);
+    for (int feature = 0, maxFeatures = featureList.size(); feature < maxFeatures; feature++) {
+      Integer featureInt = feature;
+      for (int label = 0, maxLabels = labelList.size(); label < maxLabels; label++) {
+        Integer labelInt = label;
+        double W_ij = getWeightUnprocessed(labelInt, featureInt);
+        if (W_ij == 0) {
+          continue;
         }
+        double Sigma_W_ij = perLabelThetaNormalizer[label];
+        double normalizedWeight = -W_ij / Sigma_W_ij;
+        setWeight(labelInt, featureInt, normalizedWeight);
       }
+    }
 
   }
 
   /**
    * Get the weighted probability of the feature.
-   * 
-   * @param label The label of the feature
+   *
+   * @param label   The label of the feature
    * @param feature The feature to calc. the prob. for
    * @return The weighted probability
    */



Mime
View raw message