mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r993365 [3/3] - in /mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/classifier/bayes/ core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/ core/src/main/java/org/ap...
Date Tue, 07 Sep 2010 13:54:24 GMT
Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/PoissonSlow.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/PoissonSlow.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/PoissonSlow.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/PoissonSlow.java Tue Sep  7 13:54:21 2010
@@ -8,32 +8,28 @@ It is provided "as is" without expressed
 */
 package org.apache.mahout.math.jet.random;
 
-import org.apache.mahout.common.RandomUtils;
-
 import java.util.Random;
 
 /** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
 @Deprecated
 public class PoissonSlow extends AbstractDiscreteDistribution {
 
-  private double mean;
-
-  // precomputed and cached values (for performance only)
-  private double cached_sq;
-  private double cached_alxm;
-  private double cached_g;
-
   private static final double MEAN_MAX = Integer.MAX_VALUE;
   // for all means larger than that, we don't try to compute a poisson deviation, but return the mean.
   private static final double SWITCH_MEAN = 12.0; // switch from method A to method B
 
-  private static final double[] cof = { // for method logGamma()
-      76.18009172947146, -86.50532032941677,
-      24.01409824083091, -1.231739572450155,
-      0.1208650973866179e-2, -0.5395239384953e-5};
+  // for method logGamma()
+  private static final double[] COF = {
+    76.18009172947146, -86.50532032941677,
+    24.01409824083091, -1.231739572450155,
+    0.1208650973866179e-2, -0.5395239384953e-5};
+
+  private double mean;
 
-  // The uniform random number generated shared by all <b>static</b> methods.
-  private static final PoissonSlow shared = new PoissonSlow(0.0, RandomUtils.getRandom());
+  // precomputed and cached values (for performance only)
+  private double cachedSq;
+  private double cachedAlxm;
+  private double cachedG;
 
   /** Constructs a poisson distribution. Example: mean=1.0. */
   public PoissonSlow(double mean, Random randomGenerator) {
@@ -51,7 +47,7 @@ public class PoissonSlow extends Abstrac
     tmp -= (x + 0.5) * Math.log(tmp);
     double ser = 1.000000000190015;
 
-    double[] coeff = cof;
+    double[] coeff = COF;
     for (int j = 0; j <= 5; j++) {
       x++;
       ser += coeff[j] / x;
@@ -64,7 +60,7 @@ public class PoissonSlow extends Abstrac
     /*
     * Adapted from "Numerical Recipes in C".
     */
-    double g = this.cached_g;
+    double g = this.cachedG;
 
     if (mean == -1.0) {
       return 0;
@@ -81,8 +77,8 @@ public class PoissonSlow extends Abstrac
     } else if (mean < MEAN_MAX) {
       double t;
       double em;
-      double sq = this.cached_sq;
-      double alxm = this.cached_alxm;
+      double sq = this.cachedSq;
+      double alxm = this.cachedAlxm;
 
       Random rand = this.randomGenerator;
       do {
@@ -91,7 +87,7 @@ public class PoissonSlow extends Abstrac
           y = Math.tan(Math.PI * rand.nextDouble());
           em = sq * y + mean;
         } while (em < 0.0);
-        em = (double) (int) (em); // faster than em = Math.floor(em); (em>=0.0)
+        em = (double) (int) em; // faster than em = Math.floor(em); (em>=0.0)
         t = 0.9 * (1.0 + y * y) * Math.exp(em * alxm - logGamma(em + 1.0) - g);
       } while (rand.nextDouble() > t);
       return (int) em;
@@ -122,11 +118,11 @@ public class PoissonSlow extends Abstrac
         return;
       } // not defined
       if (mean < SWITCH_MEAN) {
-        this.cached_g = Math.exp(-mean);
+        this.cachedG = Math.exp(-mean);
       } else {
-        this.cached_sq = Math.sqrt(2.0 * mean);
-        this.cached_alxm = Math.log(mean);
-        this.cached_g = mean * cached_alxm - logGamma(mean + 1.0);
+        this.cachedSq = Math.sqrt(2.0 * mean);
+        this.cachedAlxm = Math.log(mean);
+        this.cachedG = mean * cachedAlxm - logGamma(mean + 1.0);
       }
     }
   }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/StudentT.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/StudentT.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/StudentT.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/StudentT.java Tue Sep  7 13:54:21 2010
@@ -9,7 +9,6 @@ It is provided "as is" without expressed
 package org.apache.mahout.math.jet.random;
 
 import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.math.jet.random.engine.RandomEngine;
 import org.apache.mahout.math.jet.stat.Probability;
 
 import java.util.Random;
@@ -18,11 +17,11 @@ import java.util.Random;
 @Deprecated
 public class StudentT extends AbstractContinousDistribution {
 
-  private double freedom;
-
-  private double TERM; // performance cache for pdf()
   // The uniform random number generated shared by all <b>static</b> methods.
-  private static final StudentT shared = new StudentT(1.0, RandomUtils.getRandom());
+  private static final StudentT SHARED = new StudentT(1.0, RandomUtils.getRandom());
+
+  private double freedom;
+  private double term; // performance cache for pdf()
 
   /**
    * Constructs a StudentT distribution. Example: freedom=1.0.
@@ -36,6 +35,7 @@ public class StudentT extends AbstractCo
   }
 
   /** Returns the cumulative distribution function. */
+  @Override
   public double cdf(double x) {
     return Probability.studentT(freedom, x);
   }
@@ -76,12 +76,13 @@ public class StudentT extends AbstractCo
     }
     while ((w = u * u + v * v) > 1.0);
 
-    return (u * Math.sqrt(degreesOfFreedom * (Math.exp(-2.0 / degreesOfFreedom * Math.log(w)) - 1.0) / w));
+    return u * Math.sqrt(degreesOfFreedom * (Math.exp(-2.0 / degreesOfFreedom * Math.log(w)) - 1.0) / w);
   }
 
   /** Returns the probability distribution function. */
+  @Override
   public double pdf(double x) {
-    return this.TERM * Math.pow((1 + x * x / freedom), -(freedom + 1) * 0.5);
+    return this.term * Math.pow((1 + x * x / freedom), -(freedom + 1) * 0.5);
   }
 
   /**
@@ -97,7 +98,7 @@ public class StudentT extends AbstractCo
     this.freedom = freedom;
 
     double val = Fun.logGamma((freedom + 1) / 2) - Fun.logGamma(freedom / 2);
-    this.TERM = Math.exp(val) / Math.sqrt(Math.PI * freedom);
+    this.term = Math.exp(val) / Math.sqrt(Math.PI * freedom);
   }
 
   /**
@@ -107,8 +108,8 @@ public class StudentT extends AbstractCo
    * @throws IllegalArgumentException if <tt>freedom &lt;= 0.0</tt>.
    */
   public static double staticNextDouble(double freedom) {
-    synchronized (shared) {
-      return shared.nextDouble(freedom);
+    synchronized (SHARED) {
+      return SHARED.nextDouble(freedom);
     }
   }
 

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/VonMises.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/VonMises.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/VonMises.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/VonMises.java Tue Sep  7 13:54:21 2010
@@ -9,7 +9,6 @@ It is provided "as is" without expressed
 package org.apache.mahout.math.jet.random;
 
 import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.math.jet.random.engine.RandomEngine;
 
 import java.util.Random;
 
@@ -17,15 +16,15 @@ import java.util.Random;
 @Deprecated
 public class VonMises extends AbstractContinousDistribution {
 
-  private double my_k;
+  // The uniform random number generated shared by all <b>static</b> methods.
+  private static final VonMises SHARED = new VonMises(1.0, RandomUtils.getRandom());
+
+  private double myK;
 
   // cached vars for method nextDouble(a) (for performance only)
-  private double k_set = -1.0;
+  private double kSet = -1.0;
   private double r;
 
-  // The uniform random number generated shared by all <b>static</b> methods.
-  private static final VonMises shared = new VonMises(1.0, RandomUtils.getRandom());
-
   /**
    * Constructs a Von Mises distribution. Example: k=1.0.
    *
@@ -39,7 +38,7 @@ public class VonMises extends AbstractCo
   /** Returns a random number from the distribution. */
   @Override
   public double nextDouble() {
-    return nextDouble(this.my_k);
+    return nextDouble(this.myK);
   }
 
   /**
@@ -71,11 +70,11 @@ public class VonMises extends AbstractCo
       throw new IllegalArgumentException();
     }
 
-    if (k_set != k) {                                               // SET-UP
+    if (kSet != k) {                                               // SET-UP
       double tau = 1.0 + Math.sqrt(1.0 + 4.0 * k * k);
       double rho = (tau - Math.sqrt(2.0 * tau)) / (2.0 * k);
       r = (1.0 + rho * rho) / (2.0 * rho);
-      k_set = k;
+      kSet = k;
     }
 
     // GENERATOR
@@ -103,7 +102,7 @@ public class VonMises extends AbstractCo
     if (k <= 0.0) {
       throw new IllegalArgumentException();
     }
-    this.my_k = k;
+    this.myK = k;
   }
 
   /**
@@ -112,14 +111,14 @@ public class VonMises extends AbstractCo
    * @throws IllegalArgumentException if <tt>k &lt;= 0.0</tt>.
    */
   public static double staticNextDouble(double freedom) {
-    synchronized (shared) {
-      return shared.nextDouble(freedom);
+    synchronized (SHARED) {
+      return SHARED.nextDouble(freedom);
     }
   }
 
   /** Returns a String representation of the receiver. */
   public String toString() {
-    return this.getClass().getName() + '(' + my_k + ')';
+    return this.getClass().getName() + '(' + myK + ')';
   }
 
 }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/Zeta.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/Zeta.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/Zeta.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/Zeta.java Tue Sep  7 13:54:21 2010
@@ -8,27 +8,24 @@ It is provided "as is" without expressed
 */
 package org.apache.mahout.math.jet.random;
 
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.math.jet.random.engine.RandomEngine;
-
 import java.util.Random;
 
 /** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
 @Deprecated
 public class Zeta extends AbstractDiscreteDistribution {
 
+  private static final double MAX_LONG_INT = Long.MAX_VALUE - 1.5;
+  // The uniform random number generated shared by all <b>static</b> methods.
+  //private static final Zeta SHARED = new Zeta(1.0, 1.0, RandomUtils.getRandom());
+
   private double ro;
   private double pk;
 
   // cached values (for performance)
   private double c;
   private double d;
-  private double ro_prev = -1.0;
-  private double pk_prev = -1.0;
-  private static final double maxlongint = Long.MAX_VALUE - 1.5;
-
-  // The uniform random number generated shared by all <b>static</b> methods. 
-  private static final Zeta shared = new Zeta(1.0, 1.0, RandomUtils.getRandom());
+  private double roPrev = -1.0;
+  private double pkPrev = -1.0;
 
   /** Constructs a Zeta distribution. */
   public Zeta(double ro, double pk, Random randomGenerator) {
@@ -57,7 +54,7 @@ public class Zeta extends AbstractDiscre
  * It is not necessary to compute B, because variates x are       *
  * generated by acceptance rejection using density function       *
  * g(x)=ro*(c+0.5)^ro*(c+x)^-(ro+1).                              *
- *                                                                *                                                                *
+ *                                                                *
  * Integer overflow is possible, when ro is small (ro <= .5) and  *
  * pk large. In this case a new sample is generated. If ro and pk *
  * satisfy the inequality   ro > .14 + pk*1.85e-8 + .02*ln(pk)    *
@@ -66,8 +63,8 @@ public class Zeta extends AbstractDiscre
  * NOTE: The comment above is likely to be nomore valid since     *
  * the C-version operated on 32-bit integers, while this Java     *
  * version operates on 64-bit integers. However, the following is *
- * still valid:                                                   *                                                                *
- *                                                                *                                                                *
+ * still valid:                                                   *
+ *                                                                *
  * If either ro > 100  or  k > 10000 numerical problems in        *
  * computing the theoretical moments arise, therefore ro<=100 and *
  * k<=10000 are recommended.                                      *
@@ -82,9 +79,9 @@ public class Zeta extends AbstractDiscre
  *                                                                *
  ******************************************************************/
 
-    if (ro != ro_prev || pk != pk_prev) {                   // Set-up
-      ro_prev = ro;
-      pk_prev = pk;
+    if (ro != roPrev || pk != pkPrev) {                   // Set-up
+      roPrev = ro;
+      pkPrev = pk;
       if (ro < pk) {
         c = pk - 0.5;
         d = 0;
@@ -102,7 +99,7 @@ public class Zeta extends AbstractDiscre
         double u = randomGenerator.nextDouble();
         v = randomGenerator.nextDouble();
         x = (c + 0.5) * Math.exp(-Math.log(u) / ro) - c;
-      } while (x <= 0.5 || x >= maxlongint);
+      } while (x <= 0.5 || x >= MAX_LONG_INT);
 
       k = (int) (x + 0.5);
       e = -Math.log(v);

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/engine/MersenneTwister.java Tue Sep  7 13:54:21 2010
@@ -28,15 +28,24 @@ package org.apache.mahout.math.jet.rando
 
 import java.util.Date;
 /**
- MersenneTwister (MT19937) is one of the strongest uniform pseudo-random number generators known so far; at the same time it is quick.
- Produces uniformly distributed <tt>int</tt>'s and <tt>long</tt>'s in the closed intervals <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> and <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt>, respectively,
- as well as <tt>float</tt>'s and <tt>double</tt>'s in the open unit intervals <tt>(0.0f,1.0f)</tt> and <tt>(0.0,1.0)</tt>, respectively.
- The seed can be any 32-bit integer except <tt>0</tt>. Shawn J. Cokus commented that perhaps the seed should preferably be odd.
+ MersenneTwister (MT19937) is one of the strongest uniform pseudo-random number generators
+ known so far; at the same time it is quick.
+ Produces uniformly distributed <tt>int</tt>'s and <tt>long</tt>'s in the closed intervals
+ <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt> and <tt>[Long.MIN_VALUE,Long.MAX_VALUE]</tt>, respectively,
+ as well as <tt>float</tt>'s and <tt>double</tt>'s in the open unit intervals <tt>(0.0f,1.0f)</tt>
+ and <tt>(0.0,1.0)</tt>, respectively.
+ The seed can be any 32-bit integer except <tt>0</tt>. Shawn J. Cokus commented that perhaps the
+ seed should preferably be odd.
  <p>
- <b>Quality:</b> MersenneTwister is designed to pass the k-distribution test. It has an astronomically large period of 2<sup>19937</sup>-1 (=10<sup>6001</sup>) and 623-dimensional equidistribution up to 32-bit accuracy.
- It passes many stringent statistical tests, including the <A HREF="http://stat.fsu.edu/~geo/diehard.html">diehard</A> test of G. Marsaglia and the load test of P. Hellekalek and S. Wegenkittl.
+ <b>Quality:</b> MersenneTwister is designed to pass the k-distribution test. It has an
+ astronomically large period of 2<sup>19937</sup>-1 (=10<sup>6001</sup>) and 623-dimensional
+ equidistribution up to 32-bit accuracy.
+ It passes many stringent statistical tests, including the
+ <A HREF="http://stat.fsu.edu/~geo/diehard.html">diehard</A> test of G. Marsaglia
+ and the load test of P. Hellekalek and S. Wegenkittl.
  <p>
- <b>Performance:</b> Its speed is comparable to other modern generators (in particular, as fast as <tt>java.util.Random.nextFloat()</tt>).
+ <b>Performance:</b> Its speed is comparable to other modern generators (in particular,
+ as fast as <tt>java.util.Random.nextFloat()</tt>).
  2.5 million calls to <tt>raw()</tt> per second (Pentium Pro 200 Mhz, JDK 1.2, NT).
  Be aware, however, that there is a non-negligible amount of overhead required to initialize the data
  structures used by a MersenneTwister. Code like
@@ -63,7 +72,8 @@ import java.util.Date;
  ACM Transactions on Modeling and Computer Simulation,
  Vol. 8, No. 1, January 1998, pp 3--30.
  <dt>More info on <A HREF="http://www.math.keio.ac.jp/~matumoto/eindex.html"> Masumoto's homepage</A>.
- <dt>More info on <A HREF="http://www.ncsa.uiuc.edu/Apps/CMP/RNG/www-rng.html"> Pseudo-random number generators is on the Web</A>.
+ <dt>More info on <A HREF="http://www.ncsa.uiuc.edu/Apps/CMP/RNG/www-rng.html"> Pseudo-random number
+ generators is on the Web</A>.
  <dt>Yet <A HREF="http://nhse.npac.syr.edu/random"> some more info</A>.
  <p>
  The correctness of this implementation has been verified against the published output sequence
@@ -77,9 +87,11 @@ import java.util.Date;
  It is an improved version of TT800, a very successful generator.
  MersenneTwister is based on linear recurrences modulo 2.
  Such generators are very fast, have extremely long periods, and appear quite robust.
- MersenneTwister produces 32-bit numbers, and every <tt>k</tt>-dimensional vector of such numbers appears the same number of times as <tt>k</tt> successive values over the
+ MersenneTwister produces 32-bit numbers, and every <tt>k</tt>-dimensional vector of such
+ numbers appears the same number of times as <tt>k</tt> successive values over the
  period length, for each <tt>k &lt;= 623</tt> (except for the zero vector, which appears one time less).
- If one looks at only the first <tt>n &lt;= 16</tt> bits of each number, then the property holds for even larger <tt>k</tt>, as shown in the following table (taken from the publication cited above):
+ If one looks at only the first <tt>n &lt;= 16</tt> bits of each number, then the property holds
+ for even larger <tt>k</tt>, as shown in the following table (taken from the publication cited above):
  <div align="center">
  <table width="75%" border="1" cellspacing="0" cellpadding="0">
  <tr>
@@ -117,12 +129,16 @@ import java.util.Date;
  </table>
  </div>
  <p>
- MersenneTwister generates random numbers in batches of 624 numbers at a time, so the caching and pipelining of modern systems is exploited.
- The generator is implemented to generate the output by using the fastest arithmetic operations only: 32-bit additions and bit operations (no division, no multiplication, no mod).
+ MersenneTwister generates random numbers in batches of 624 numbers at a time, so
+ the caching and pipelining of modern systems is exploited.
+ The generator is implemented to generate the output by using the fastest arithmetic
+ operations only: 32-bit additions and bit operations (no division, no multiplication, no mod).
  These operations generate sequences of 32 random bits (<tt>int</tt>'s).
  <tt>long</tt>'s are formed by concatenating two 32 bit <tt>int</tt>'s.
- <tt>float</tt>'s are formed by dividing the interval <tt>[0.0,1.0]</tt> into 2<sup>32</sup> sub intervals, then randomly choosing one subinterval.
- <tt>double</tt>'s are formed by dividing the interval <tt>[0.0,1.0]</tt> into 2<sup>64</sup> sub intervals, then randomly choosing one subinterval.
+ <tt>float</tt>'s are formed by dividing the interval <tt>[0.0,1.0]</tt> into 2<sup>32</sup>
+ sub intervals, then randomly choosing one subinterval.
+ <tt>double</tt>'s are formed by dividing the interval <tt>[0.0,1.0]</tt> into 2<sup>64</sup>
+ sub intervals, then randomly choosing one subinterval.
  <p>
  @author wolfgang.hoschek@cern.ch
  @version 1.0, 09/24/99
@@ -130,9 +146,6 @@ import java.util.Date;
    */
 public class MersenneTwister extends RandomEngine {
 
-  private int mti;
-  private int[] mt = new int[N]; /* set initial seeds: N = 624 words */
-
   /* Period parameters */
   private static final int N = 624;
   private static final int M = 397;
@@ -144,13 +157,16 @@ public class MersenneTwister extends Ran
   private static final int TEMPERING_MASK_B = 0x9d2c5680;
   private static final int TEMPERING_MASK_C = 0xefc60000;
 
-  private static final int mag0 = 0x0;
-  private static final int mag1 = MATRIX_A;
+  private static final int MAG0 = 0x0;
+  private static final int MAG1 = MATRIX_A;
   //private static final int[] mag01=new int[] {0x0, MATRIX_A};
   /* mag01[x] = x * MATRIX_A  for x=0,1 */
 
   private static final int DEFAULT_SEED = 4357;
 
+  private int mti;
+  private int[] mt = new int[N]; /* set initial seeds: N = 624 words */  
+
   /**
    * Constructs and returns a random number generator with a default seed, which is a <b>constant</b>. Thus using this
    * constructor will yield generators that always produce exactly the same sequence. This method is mainly intended to
@@ -196,20 +212,21 @@ public class MersenneTwister extends Ran
 
     for (kk = 0; kk < N - M; kk++) {
       y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
-      mt[kk] = mt[kk + M] ^ (y >>> 1) ^ ((y & 0x1) == 0 ? mag0 : mag1);
+      mt[kk] = mt[kk + M] ^ (y >>> 1) ^ ((y & 0x1) == 0 ? MAG0 : MAG1);
     }
     for (; kk < N - 1; kk++) {
       y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK);
-      mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ ((y & 0x1) == 0 ? mag0 : mag1);
+      mt[kk] = mt[kk + (M - N)] ^ (y >>> 1) ^ ((y & 0x1) == 0 ? MAG0 : MAG1);
     }
     y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK);
-    mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ ((y & 0x1) == 0 ? mag0 : mag1);
+    mt[N - 1] = mt[M - 1] ^ (y >>> 1) ^ ((y & 0x1) == 0 ? MAG0 : MAG1);
 
     this.mti = 0;
   }
 
   /**
-   * Returns a 32 bit uniformly distributed random number in the closed interval <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt>
+   * Returns a 32 bit uniformly distributed random number in the closed interval
+   * <tt>[Integer.MIN_VALUE,Integer.MAX_VALUE]</tt>
    * (including <tt>Integer.MIN_VALUE</tt> and <tt>Integer.MAX_VALUE</tt>).
    */
   @Override
@@ -234,12 +251,12 @@ public class MersenneTwister extends Ran
   protected void setSeed(int seed) {
     mt[0] = seed;
     for (int i = 1; i < N; i++) {
-      mt[i] = (1812433253 * (mt[i - 1] ^ (mt[i - 1] >> 30)) + i);
+      mt[i] = 1812433253 * (mt[i - 1] ^ (mt[i - 1] >> 30)) + i;
       /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
       /* In the previous versions, MSBs of the seed affect   */
       /* only MSBs of the array mt[].                        */
       /* 2002/01/09 modified by Makoto Matsumoto             */
-      mt[i] &= 0xffffffff;
+      //mt[i] &= 0xffffffff;
       /* for >32 bit machines */
     }
     //log.info("init done");
@@ -264,7 +281,7 @@ public class MersenneTwister extends Ran
       seed = 69069 * seed + 1;
       mt[i] |= (seed & 0xffff0000) >>> 16;
       seed = 69069 * seed + 1;
-     }
+    }
     //log.info("init done");
     mti = N;
   }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSampler.java Tue Sep  7 13:54:21 2010
@@ -10,13 +10,14 @@ package org.apache.mahout.math.jet.rando
 
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.PersistentObject;
-import org.apache.mahout.math.jet.random.engine.RandomEngine;
 
 import java.util.Random;
 
 /**
- * Space and time efficiently computes a sorted <i>Simple Random Sample Without Replacement (SRSWOR)</i>, that is, a sorted set of <tt>n</tt> random numbers from an interval of <tt>N</tt> numbers;
- * Example: Computing <tt>n=3</tt> random numbers from the interval <tt>[1,50]</tt> may yield the sorted random set <tt>(7,13,47)</tt>.
+ * Space and time efficiently computes a sorted <i>Simple Random Sample Without Replacement
+ * (SRSWOR)</i>, that is, a sorted set of <tt>n</tt> random numbers from an interval of <tt>N</tt> numbers;
+ * Example: Computing <tt>n=3</tt> random numbers from the interval <tt>[1,50]</tt> may yield
+ * the sorted random set <tt>(7,13,47)</tt>.
  * Since we are talking about a set (sampling without replacement), no element will occur more than once.
  * Each number from the <tt>N</tt> numbers has the same probability to be included in the <tt>n</tt> chosen numbers.
  *
@@ -24,20 +25,26 @@ import java.util.Random;
  * Suppose we have a file containing 10^12 objects.
  * We would like to take a truly random subset of 10^6 objects and do something with it, 
  * for example, compute the sum over some instance field, or whatever.
- * How do we choose the subset? In particular, how do we avoid multiple equal elements? How do we do this quick and without consuming excessive memory? How do we avoid slowly jumping back and forth within the file? </i>
+ * How do we choose the subset? In particular, how do we avoid multiple equal elements?
+ * How do we do this quick and without consuming excessive memory?
+ * How do we avoid slowly jumping back and forth within the file? </i>
  *
  * <p><b>Sorted Simple Random Sample Without Replacement (SRSWOR):</b>
  * What are the exact semantics of this class? What is a SRSWOR? In which sense exactly is a returned set "random"?
- * It is random in the sense, that each number from the <tt>N</tt> numbers has the same probability to be included in the <tt>n</tt> chosen numbers.
+ * It is random in the sense, that each number from the <tt>N</tt> numbers has the
+ * same probability to be included in the <tt>n</tt> chosen numbers.
  * For those who think in implementations rather than abstract interfaces:
  * <i>Suppose, we have an empty list.
- * We pick a random number between 1 and 10^12 and add it to the list only if it was not already picked before, i.e. if it is not already contained in the list.
+ * We pick a random number between 1 and 10^12 and add it to the list only if it was not
+ * already picked before, i.e. if it is not already contained in the list.
  * We then do the same thing again and again until we have eventually collected 10^6 distinct numbers.
  * Now we sort the set ascending and return it.</i>
  * <dt>It is exactly in this sense that this class returns "random" sets.
- * <b>Note, however, that the implementation of this class uses a technique orders of magnitudes better (both in time and space) than the one outlined above.</b> 
+ * <b>Note, however, that the implementation of this class uses a technique orders of magnitudes
+ * better (both in time and space) than the one outlined above.</b>
  *
- * <p><b>Performance:</b> Space requirements are zero. Running time is <tt>O(n)</tt> on average, <tt>O(N)</tt> in the worst case.
+ * <p><b>Performance:</b> Space requirements are zero. Running time is <tt>O(n)</tt> on average,
+ * <tt>O(N)</tt> in the worst case.
  * <h2 align=center>Performance (200Mhz Pentium Pro, JDK 1.2, NT)</h2>
  * <center>
  *   <table border="1">
@@ -84,24 +91,33 @@ import java.util.Random;
  *   </table>
  * </center>
  *
- * <p><b>Scalability:</b> This random sampler is designed to be scalable. In iterator style, it is able to compute and deliver sorted random sets stepwise in units called <i>blocks</i>.
- * Example: Computing <tt>n=9</tt> random numbers from the interval <tt>[1,50]</tt> in 3 blocks may yield the blocks <tt>(7,13,14), (27,37,42), (45,46,49)</tt>.
- * (The maximum of a block is guaranteed to be less than the minimum of its successor block. Every block is sorted ascending. No element will ever occur twice, both within a block and among blocks.)
+ * <p><b>Scalability:</b> This random sampler is designed to be scalable. In iterator style,
+ * it is able to compute and deliver sorted random sets stepwise in units called <i>blocks</i>.
+ * Example: Computing <tt>n=9</tt> random numbers from the interval <tt>[1,50]</tt> in
+ * 3 blocks may yield the blocks <tt>(7,13,14), (27,37,42), (45,46,49)</tt>.
+ * (The maximum of a block is guaranteed to be less than the minimum of its successor block.
+ * Every block is sorted ascending. No element will ever occur twice, both within a block and among blocks.)
  * A block can be computed and retrieved with method <tt>nextBlock</tt>.
  * Successive calls to method <tt>nextBlock</tt> will deliver as many random numbers as required.
  *
- * <p>Computing and retrieving samples in blocks is useful if you need very many random numbers that cannot be stored in main memory at the same time.
- * For example, if you want to compute 10^10 such numbers you can do this by computing them in blocks of, say, 500 elements each.
+ * <p>Computing and retrieving samples in blocks is useful if you need very many random
+ * numbers that cannot be stored in main memory at the same time.
+ * For example, if you want to compute 10^10 such numbers you can do this by computing
+ * them in blocks of, say, 500 elements each.
  * You then need only space to keep one block of 500 elements (i.e. 4 KB).
- * When you are finished processing the first 500 elements you call <tt>nextBlock</tt> to fill the next 500 elements into the block, process them, and so on.
- * If you have the time and need, by using such blocks you can compute random sets up to <tt>n=10^19</tt> random numbers.
+ * When you are finished processing the first 500 elements you call <tt>nextBlock</tt> to
+ * fill the next 500 elements into the block, process them, and so on.
+ * If you have the time and need, by using such blocks you can compute random sets
+ * up to <tt>n=10^19</tt> random numbers.
  *
  * <p>If you do not need the block feature, you can also directly call 
  * the static methods of this class without needing to construct a <tt>RandomSampler</tt> instance first.
  *
- * <p><b>Random number generation:</b> By default uses <tt>MersenneTwister</tt>, a very strong random number generator, much better than <tt>java.util.Random</tt>.
+ * <p><b>Random number generation:</b> By default uses <tt>MersenneTwister</tt>, a very
+ * strong random number generator, much better than <tt>java.util.Random</tt>.
  * You can also use other strong random number generators of Paul Houle's RngPack package.
- * For example, <tt>Ranecu</tt>, <tt>Ranmar</tt> and <tt>Ranlux</tt> are strong well analyzed research grade pseudo-random number generators with known periods.
+ * For example, <tt>Ranecu</tt>, <tt>Ranmar</tt> and <tt>Ranlux</tt> are strong well
+ * analyzed research grade pseudo-random number generators with known periods.
  *
  * <p><b>Implementation:</b> after J.S. Vitter, An Efficient Algorithm for Sequential Random Sampling,
  * ACM Transactions on Mathematical Software, Vol 13, 1987.
@@ -214,7 +230,8 @@ public class RandomSampler extends Persi
        and then invert the result.
       For example, sampling 99% turns into sampling 1% plus inversion.
 
-      This algorithm is the same as method sampleMethodD(...) with the exception that sampled elements are rejected, and not sampled elements included in the result set.
+      This algorithm is the same as method sampleMethodD(...) with the exception that sampled elements are rejected,
+      and not sampled elements included in the result set.
     */
     n = N - n; // IMPORTANT !!!
 
@@ -222,7 +239,8 @@ public class RandomSampler extends Persi
     long chosen = -1 + low;
 
     //long negalphainv =
-    //    -13;  //tuning paramter, determines when to switch from method D to method A. Dependent on programming language, platform, etc.
+    //    -13;  //tuning paramter, determines when to switch from method D to method A. Dependent on programming
+    // language, platform, etc.
 
     double nreal = n;
     double ninv = 1.0 / nreal;
@@ -462,11 +480,12 @@ public class RandomSampler extends Persi
     double nreal = n;
     double ninv = 1.0 / nreal;
     double Nreal = N;
-    double Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
+    double vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
     long qu1 = -n + 1 + N;
     double qu1real = -nreal + 1.0 + Nreal;
-    long negalphainv =
-        -13;  //tuning paramter, determines when to switch from method D to method A. Dependent on programming language, platform, etc.
+    long negalphainv = -13;
+    //tuning paramter, determines when to switch from method D to method A. Dependent on programming
+    // language, platform, etc.
     long threshold = -negalphainv * n;
 
     long S;
@@ -476,20 +495,20 @@ public class RandomSampler extends Persi
       while (true) {
         double X;
         while (true) { // step D2: generate U and X
-          X = Nreal * (-Vprime + 1.0);
+          X = Nreal * (-vprime + 1.0);
           S = (long) X;
           if (S < qu1) {
             break;
           }
-          Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
+          vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
         }
         double U = randomGenerator.nextDouble();
         negSreal = -S;
 
         //step D3: Accept?
         double y1 = Math.exp(Math.log(U * Nreal / qu1real) * nmin1inv);
-        Vprime = y1 * (-X / Nreal + 1.0) * (qu1real / (negSreal + qu1real));
-        if (Vprime <= 1.0) {
+        vprime = y1 * (-X / Nreal + 1.0) * (qu1real / (negSreal + qu1real));
+        if (vprime <= 1.0) {
           break;
         } //break inner loop
 
@@ -512,10 +531,10 @@ public class RandomSampler extends Persi
         }
         if (Nreal / (-X + Nreal) >= y1 * Math.exp(Math.log(y2) * nmin1inv)) {
           // accept !
-          Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * nmin1inv);
+          vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * nmin1inv);
           break; //break inner loop
         }
-        Vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
+        vprime = Math.exp(Math.log(randomGenerator.nextDouble()) * ninv);
       }
 
       //step D5: select the (S+1)st record !
@@ -547,7 +566,7 @@ public class RandomSampler extends Persi
         sampleMethodA(n, N, count, chosen + 1, values, fromIndex, randomGenerator);
       } else {
         //special case n==1
-        S = (long) (N * Vprime);
+        S = (long) (N * vprime);
         chosen += S + 1;
         values[fromIndex++] = chosen;
       }

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/RandomSamplingAssistant.java Tue Sep  7 13:54:21 2010
@@ -9,13 +9,15 @@ It is provided "as is" without expressed
 package org.apache.mahout.math.jet.random.sampling;
 
 import org.apache.mahout.math.PersistentObject;
-import org.apache.mahout.math.jet.random.engine.RandomEngine;
 
 import java.util.Random;
 
 /** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
 @Deprecated
 public class RandomSamplingAssistant extends PersistentObject {
+
+  private static final int MAX_BUFFER_SIZE = 200;
+
   //public class RandomSamplingAssistant extends Object implements java.io.Serializable {
   private RandomSampler sampler;
   private final long[] buffer;
@@ -24,8 +26,6 @@ public class RandomSamplingAssistant ext
   private long skip;
   private long n;
 
-  private static final int MAX_BUFFER_SIZE = 200;
-
   /**
    * Constructs a random sampler that samples <tt>n</tt> random elements from an input sequence of <tt>N</tt> elements.
    *

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/random/sampling/WeightedRandomSampler.java Tue Sep  7 13:54:21 2010
@@ -11,7 +11,6 @@ package org.apache.mahout.math.jet.rando
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.PersistentObject;
 import org.apache.mahout.math.jet.random.Uniform;
-import org.apache.mahout.math.jet.random.engine.RandomEngine;
 import org.apache.mahout.math.list.IntArrayList;
 
 import java.util.Random;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Descriptive.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Descriptive.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Descriptive.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Descriptive.java Tue Sep  7 13:54:21 2010
@@ -13,7 +13,7 @@ import org.apache.mahout.math.list.IntAr
 
 /** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
 @Deprecated
-public class Descriptive {
+public final class Descriptive {
 
   /** Makes this class non instantiable, but still let's others inherit from it. */
   private Descriptive() {
@@ -21,18 +21,18 @@ public class Descriptive {
 
   /** Returns the auto-correlation of a data sequence. */
   public static double autoCorrelation(DoubleArrayList data, int lag, double mean, double variance) {
-    int N = data.size();
-    if (lag >= N) {
+    int n = data.size();
+    if (lag >= n) {
       throw new IllegalArgumentException("Lag is too large");
     }
 
     double[] elements = data.elements();
     double run = 0;
-    for (int i = lag; i < N; ++i) {
+    for (int i = lag; i < n; ++i) {
       run += (elements[i] - mean) * (elements[i - lag] - mean);
     }
 
-    return (run / (N - lag)) / variance;
+    return (run / (n - lag)) / variance;
   }
 
   /**
@@ -40,7 +40,7 @@ public class Descriptive {
    *
    * @throws IndexOutOfBoundsException if <tt>to!=from-1 || from&lt;0 || from&gt;to || to&gt;=size()</tt>.
    */
-  protected static void checkRangeFromTo(int from, int to, int theSize) {
+  static void checkRangeFromTo(int from, int to, int theSize) {
     if (to == from - 1) {
       return;
     }
@@ -49,7 +49,10 @@ public class Descriptive {
     }
   }
 
-  /** Returns the correlation of two data sequences. That is <tt>covariance(data1,data2)/(standardDev1*standardDev2)</tt>. */
+  /**
+   * Returns the correlation of two data sequences.
+   * That is <tt>covariance(data1,data2)/(standardDev1*standardDev2)</tt>.
+   */
   public static double correlation(DoubleArrayList data1, double standardDev1, DoubleArrayList data2,
                                    double standardDev2) {
     return covariance(data1, data2) / (standardDev1 * standardDev2);
@@ -70,16 +73,16 @@ public class Descriptive {
 
     double sumx = elements1[0];
     double sumy = elements2[0];
-    double Sxy = 0;
+    double sxy = 0;
     for (int i = 1; i < size; ++i) {
       double x = elements1[i];
       double y = elements2[i];
       sumx += x;
-      Sxy += (x - sumx / (i + 1)) * (y - sumy / i);
+      sxy += (x - sumx / (i + 1)) * (y - sumy / i);
       sumy += y;
       // Exercise for the reader: Why does this give us the right answer?
     }
-    return Sxy / (size - 1);
+    return sxy / (size - 1);
   }
 
   /*
@@ -110,15 +113,15 @@ public class Descriptive {
     }
 
     double[] elements = data.elements();
-    double run_sq = elements[0] * elements[0];
+    double runSq = elements[0] * elements[0];
     double run = 0;
     for (int i = 1; i < size; ++i) {
       double x = elements[i] - elements[i - 1];
       run += x * x;
-      run_sq += elements[i] * elements[i];
+      runSq += elements[i] * elements[i];
     }
 
-    return run / run_sq;
+    return run / runSq;
   }
 
   /**
@@ -152,8 +155,9 @@ public class Descriptive {
       int cursor = i;
 
       // determine run length (number of equal elements)
-      while (++i < size && sortedElements[i] == element) {
-      }
+      do {
+        i++;
+      } while (i < size && sortedElements[i] == element);
 
       int runLength = i - cursor;
       distinctValues.add(element);
@@ -285,7 +289,8 @@ public class Descriptive {
    *                    <tt>data[from], ..., data[to]</tt>.
    * @param sumOfPowers the old values of the sums in the following format: <ul> <li><tt>sumOfPowers[0]</tt> is the old
    *                    <tt>Sum(data[i]<sup>fromSumIndex</sup>)</tt>. <li><tt>sumOfPowers[1]</tt> is the old
-   *                    <tt>Sum(data[i]<sup>fromSumIndex+1</sup>)</tt>. <li>... <li><tt>sumOfPowers[toSumIndex-fromSumIndex]</tt>
+   *                    <tt>Sum(data[i]<sup>fromSumIndex+1</sup>)</tt>. <li>...
+   *                    <li><tt>sumOfPowers[toSumIndex-fromSumIndex]</tt>
    *                    is the old <tt>Sum(data[i]<sup>toSumIndex</sup>)</tt>. </ul> If no data sequence elements have
    *                    so far been recorded set all old values of the sums to <tt>0.0</tt>.
    */
@@ -317,38 +322,38 @@ public class Descriptive {
         double[] elements = data.elements();
         double sum = sumOfPowers[0];
         double sumSquares = sumOfPowers[1];
-        double sum_xxx = sumOfPowers[2];
+        double sumXxx = sumOfPowers[2];
         for (int i = from - 1; ++i <= to;) {
           double element = elements[i];
           sum += element;
           sumSquares += element * element;
-          sum_xxx += element * element * element;
+          sumXxx += element * element * element;
           //if (element < min) min = element;
           //else if (element > max) max = element;
         }
         sumOfPowers[0] += sum;
         sumOfPowers[1] += sumSquares;
-        sumOfPowers[2] += sum_xxx;
+        sumOfPowers[2] += sumXxx;
         return;
       } else if (toSumIndex == 4) { // handle quicker
         double[] elements = data.elements();
         double sum = sumOfPowers[0];
         double sumSquares = sumOfPowers[1];
-        double sum_xxx = sumOfPowers[2];
-        double sum_xxxx = sumOfPowers[3];
+        double sumXxx = sumOfPowers[2];
+        double sumXxxx = sumOfPowers[3];
         for (int i = from - 1; ++i <= to;) {
           double element = elements[i];
           sum += element;
           sumSquares += element * element;
-          sum_xxx += element * element * element;
-          sum_xxxx += element * element * element * element;
+          sumXxx += element * element * element;
+          sumXxxx += element * element * element * element;
           //if (element < min) min = element;
           //else if (element > max) max = element;
         }
         sumOfPowers[0] += sum;
         sumOfPowers[1] += sumSquares;
-        sumOfPowers[2] += sum_xxx;
-        sumOfPowers[3] += sum_xxxx;
+        sumOfPowers[2] += sumXxx;
+        sumOfPowers[3] += sumXxxx;
         return;
       }
     }
@@ -463,8 +468,8 @@ public class Descriptive {
     double v = (elements[0] - mean) * (elements[0] - mean);
 
     for (int i = 1; i < size; i++) {
-      double delta0 = (elements[i - 1] - mean);
-      double delta1 = (elements[i] - mean);
+      double delta0 = elements[i - 1] - mean;
+      double delta1 = elements[i] - mean;
       q += (delta0 * delta1 - q) / (i + 1);
       v += (delta1 * delta1 - v) / (i + 1);
     }
@@ -619,7 +624,8 @@ public class Descriptive {
   }
 
   /**
-   * Returns the product, which is <tt>Prod( data[i] )</tt>. In other words: <tt>data[0]*data[1]*...*data[data.size()-1]</tt>.
+   * Returns the product, which is <tt>Prod( data[i] )</tt>.
+   * In other words: <tt>data[0]*data[1]*...*data[data.size()-1]</tt>.
    * This method uses the equivalent definition: <tt>prod = pow( exp( Sum( Log(x[i]) ) / size(), size())</tt>.
    */
   public static double product(int size, double sumOfLogarithms) {
@@ -830,13 +836,13 @@ public class Descriptive {
     // the unbiased standard deviation.
     double s = Math.sqrt(sampleVariance);
     // It needs to be multiplied by this correction factor.
-    double Cn;
+    double cn;
     if (n > 30) {
-      Cn = 1 + 1.0 / (4 * (n - 1)); // Cn = 1+1/(4*(n-1));
+      cn = 1 + 1.0 / (4 * (n - 1)); // Cn = 1+1/(4*(n-1));
     } else {
-      Cn = Math.sqrt((n - 1) * 0.5) * Gamma.gamma((n - 1) * 0.5) / Gamma.gamma(n * 0.5);
+      cn = Math.sqrt((n - 1) * 0.5) * Gamma.gamma((n - 1) * 0.5) / Gamma.gamma(n * 0.5);
     }
-    return Cn * s;
+    return cn * s;
   }
 
   /**
@@ -927,7 +933,8 @@ public class Descriptive {
         bins[i].addAllOfFromTo(sortedList, nextStart, insertionPosition - 1);
         nextStart = insertionPosition;
       } else { // splitValue found
-        // For multiple identical elements ("runs"), binarySearch does not define which of all valid indexes is returned.
+        // For multiple identical elements ("runs"),
+        // binarySearch does not define which of all valid indexes is returned.
         // Thus, skip over to the first element of a run.
         do {
           index--;
@@ -1148,21 +1155,21 @@ public class Descriptive {
    * @param mean       the mean of the (full) sorted data sequence.
    */
   public static double trimmedMean(DoubleArrayList sortedData, double mean, int left, int right) {
-    int N = sortedData.size();
-    if (N == 0) {
+    int n = sortedData.size();
+    if (n == 0) {
       throw new IllegalArgumentException("Empty data.");
     }
-    if (left + right >= N) {
+    if (left + right >= n) {
       throw new IllegalArgumentException("Not enough data.");
     }
 
     double[] sortedElements = sortedData.elements();
-    int N0 = N;
+    int n0 = n;
     for (int i = 0; i < left; ++i) {
-      mean += (mean - sortedElements[i]) / (--N);
+      mean += (mean - sortedElements[i]) / (--n);
     }
     for (int i = 0; i < right; ++i) {
-      mean += (mean - sortedElements[N0 - 1 - i]) / (--N);
+      mean += (mean - sortedElements[n0 - 1 - i]) / (--n);
     }
     return mean;
   }
@@ -1185,7 +1192,9 @@ public class Descriptive {
     return (sumOfSquares - mean * sum) / size;
   }
 
-  /** Returns the weighted mean of a data sequence. That is <tt> Sum (data[i] * weights[i]) / Sum ( weights[i] )</tt>. */
+  /**
+   * Returns the weighted mean of a data sequence. That is <tt> Sum (data[i] * weights[i]) / Sum ( weights[i] )</tt>. 
+   */
   public static double weightedMean(DoubleArrayList data, DoubleArrayList weights) {
     int size = data.size();
     if (size != weights.size() || size == 0) {
@@ -1223,11 +1232,11 @@ public class Descriptive {
    * @param mean       the mean of the (full) sorted data sequence.
    */
   public static double winsorizedMean(DoubleArrayList sortedData, double mean, int left, int right) {
-    int N = sortedData.size();
-    if (N == 0) {
+    int n = sortedData.size();
+    if (n == 0) {
       throw new IllegalArgumentException("Empty data.");
     }
-    if (left + right >= N) {
+    if (left + right >= n) {
       throw new IllegalArgumentException("Not enough data.");
     }
 
@@ -1235,12 +1244,12 @@ public class Descriptive {
 
     double leftElement = sortedElements[left];
     for (int i = 0; i < left; ++i) {
-      mean += (leftElement - sortedElements[i]) / N;
+      mean += (leftElement - sortedElements[i]) / n;
     }
 
-    double rightElement = sortedElements[N - 1 - right];
+    double rightElement = sortedElements[n - 1 - right];
     for (int i = 0; i < right; ++i) {
-      mean += (rightElement - sortedElements[N - 1 - i]) / N;
+      mean += (rightElement - sortedElements[n - 1 - i]) / n;
     }
 
     return mean;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory1D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory1D.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory1D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory1D.java Tue Sep  7 13:54:21 2010
@@ -11,7 +11,6 @@ package org.apache.mahout.math.matrix;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.PersistentObject;
 import org.apache.mahout.math.function.Functions;
-import org.apache.mahout.math.jet.random.engine.MersenneTwister;
 import org.apache.mahout.math.jet.random.sampling.RandomSamplingAssistant;
 import org.apache.mahout.math.list.AbstractDoubleList;
 import org.apache.mahout.math.list.DoubleArrayList;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory2D.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory2D.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory2D.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/DoubleFactory2D.java Tue Sep  7 13:54:21 2010
@@ -11,7 +11,6 @@ package org.apache.mahout.math.matrix;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.PersistentObject;
 import org.apache.mahout.math.function.Functions;
-import org.apache.mahout.math.jet.random.engine.MersenneTwister;
 import org.apache.mahout.math.jet.random.sampling.RandomSamplingAssistant;
 import org.apache.mahout.math.matrix.impl.DenseDoubleMatrix2D;
 import org.apache.mahout.math.matrix.impl.SparseDoubleMatrix2D;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/Algebra.java Tue Sep  7 13:54:21 2010
@@ -11,8 +11,6 @@ package org.apache.mahout.math.matrix.li
 import org.apache.mahout.math.GenericPermuting;
 import org.apache.mahout.math.PersistentObject;
 import org.apache.mahout.math.Swapper;
-import org.apache.mahout.math.function.BinaryFunction;
-import org.apache.mahout.math.matrix.DoubleFactory2D;
 import org.apache.mahout.math.matrix.DoubleMatrix1D;
 import org.apache.mahout.math.matrix.DoubleMatrix2D;
 

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java (original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/matrix/linalg/LUDecompositionQuick.java Tue Sep  7 13:54:21 2010
@@ -19,7 +19,7 @@ import org.apache.mahout.math.matrix.Dou
 public class LUDecompositionQuick implements java.io.Serializable {
 
   /** Array for internal storage of decomposition. */
-  private DoubleMatrix2D LU;
+  private DoubleMatrix2D lu;
 
   /** pivot sign. */
   private int pivsign;
@@ -55,7 +55,7 @@ public class LUDecompositionQuick implem
    */
   public void decompose(DoubleMatrix2D A) {
     // setup
-    LU = A;
+    lu = A;
     int m = A.rows();
     int n = A.columns();
 
@@ -69,46 +69,46 @@ public class LUDecompositionQuick implem
     pivsign = 1;
 
     if (m * n == 0) {
-      setLU(LU);
+      setLU(lu);
       return; // nothing to do
     }
 
     //precompute and cache some views to avoid regenerating them time and again
-    DoubleMatrix1D[] LUrows = new DoubleMatrix1D[m];
+    DoubleMatrix1D[] luRows = new DoubleMatrix1D[m];
     for (int i = 0; i < m; i++) {
-      LUrows[i] = LU.viewRow(i);
+      luRows[i] = lu.viewRow(i);
     }
 
     IntArrayList nonZeroIndexes =
         new IntArrayList(); // sparsity
-    DoubleMatrix1D LUcolj = LU.viewColumn(0).like();  // blocked column j
+    DoubleMatrix1D luColj = lu.viewColumn(0).like();  // blocked column j
     Mult multFunction = Mult.mult(0);
 
     // Outer loop.
-    int CUT_OFF = 10;
+    int cutOff = 10;
     for (int j = 0; j < n; j++) {
       // blocking (make copy of j-th column to localize references)
-      LUcolj.assign(LU.viewColumn(j));
+      luColj.assign(lu.viewColumn(j));
 
       // sparsity detection
-      int maxCardinality = m / CUT_OFF; // == heuristic depending on speedup
-      LUcolj.getNonZeros(nonZeroIndexes, null, maxCardinality);
+      int maxCardinality = m / cutOff; // == heuristic depending on speedup
+      luColj.getNonZeros(nonZeroIndexes, null, maxCardinality);
       int cardinality = nonZeroIndexes.size();
-      boolean sparse = (cardinality < maxCardinality);
+      boolean sparse = cardinality < maxCardinality;
 
       // Apply previous transformations.
       for (int i = 0; i < m; i++) {
         int kmax = Math.min(i, j);
         double s;
         if (sparse) {
-          s = LUrows[i].zDotProduct(LUcolj, 0, kmax, nonZeroIndexes);
+          s = luRows[i].zDotProduct(luColj, 0, kmax, nonZeroIndexes);
         } else {
-          s = LUrows[i].zDotProduct(LUcolj, 0, kmax);
+          s = luRows[i].zDotProduct(luColj, 0, kmax);
         }
-        double before = LUcolj.getQuick(i);
+        double before = luColj.getQuick(i);
         double after = before - s;
-        LUcolj.setQuick(i, after); // LUcolj is a copy
-        LU.setQuick(i, j, after);   // this is the original
+        luColj.setQuick(i, after); // LUcolj is a copy
+        lu.setQuick(i, j, after);   // this is the original
         if (sparse) {
           if (before == 0 && after != 0) { // nasty bug fixed!
             int pos = nonZeroIndexes.binarySearch(i);
@@ -124,9 +124,9 @@ public class LUDecompositionQuick implem
       // Find pivot and exchange if necessary.
       int p = j;
       if (p < m) {
-        double max = Math.abs(LUcolj.getQuick(p));
+        double max = Math.abs(luColj.getQuick(p));
         for (int i = j + 1; i < m; i++) {
-          double v = Math.abs(LUcolj.getQuick(i));
+          double v = Math.abs(luColj.getQuick(i));
           if (v > max) {
             p = i;
             max = v;
@@ -134,7 +134,7 @@ public class LUDecompositionQuick implem
         }
       }
       if (p != j) {
-        LUrows[p].swap(LUrows[j]);
+        luRows[p].swap(luRows[j]);
         int k = piv[p];
         piv[p] = piv[j];
         piv[j] = k;
@@ -143,13 +143,13 @@ public class LUDecompositionQuick implem
 
       // Compute multipliers.
       double jj;
-      if (j < m && (jj = LU.getQuick(j, j)) != 0.0) {
+      if (j < m && (jj = lu.getQuick(j, j)) != 0.0) {
         multFunction.setMultiplicator(1 / jj);
-        LU.viewColumn(j).viewPart(j + 1, m - (j + 1)).assign(multFunction);
+        lu.viewColumn(j).viewPart(j + 1, m - (j + 1)).assign(multFunction);
       }
 
     }
-    setLU(LU);
+    setLU(lu);
   }
 
   /**
@@ -166,7 +166,7 @@ public class LUDecompositionQuick implem
       return;
     }
     // setup
-    LU = A;
+    lu = A;
     int m = A.rows();
     int n = A.columns();
 
@@ -220,7 +220,7 @@ public class LUDecompositionQuick implem
 
     double det = (double) pivsign;
     for (int j = 0; j < n; j++) {
-      det *= LU.getQuick(j, j);
+      det *= lu.getQuick(j, j);
     }
     return det;
   }
@@ -245,7 +245,7 @@ public class LUDecompositionQuick implem
    * @return <tt>L</tt>
    */
   public DoubleMatrix2D getL() {
-    return lowerTriangular(LU.copy());
+    return lowerTriangular(lu.copy());
   }
 
   /**
@@ -254,7 +254,7 @@ public class LUDecompositionQuick implem
    * @return <tt>LU</tt>
    */
   public DoubleMatrix2D getLU() {
-    return LU.copy();
+    return lu.copy();
   }
 
   /**
@@ -272,7 +272,7 @@ public class LUDecompositionQuick implem
    * @return <tt>U</tt>
    */
   public DoubleMatrix2D getU() {
-    return upperTriangular(LU.copy());
+    return upperTriangular(lu.copy());
   }
 
   /**
@@ -338,11 +338,11 @@ public class LUDecompositionQuick implem
   }
 
   protected int m() {
-    return LU.rows();
+    return lu.rows();
   }
 
   protected int n() {
-    return LU.columns();
+    return lu.columns();
   }
 
   /**
@@ -350,7 +350,7 @@ public class LUDecompositionQuick implem
    * indeed a proper LU decomposition.
    */
   public void setLU(DoubleMatrix2D LU) {
-    this.LU = LU;
+    this.lu = LU;
     this.isNonSingular = isNonsingular(LU);
   }
 
@@ -364,7 +364,7 @@ public class LUDecompositionQuick implem
    * @throws IllegalArgumentException if <tt>A.rows() < A.columns()</tt>.
    */
   public void solve(DoubleMatrix1D B) {
-    Property.checkRectangular(LU);
+    Property.checkRectangular(lu);
     int m = m();
     int n = n();
     if (B.size() != m) {
@@ -392,7 +392,7 @@ public class LUDecompositionQuick implem
       if (f != 0) {
         for (int i = k + 1; i < n; i++) {
           // B[i] -= B[k]*LU[i][k];
-          double v = LU.getQuick(i, k);
+          double v = lu.getQuick(i, k);
           if (v != 0) {
             B.setQuick(i, B.getQuick(i) - f * v);
           }
@@ -403,12 +403,12 @@ public class LUDecompositionQuick implem
     // Solve U*B = Y;
     for (int k = n - 1; k >= 0; k--) {
       // B[k] /= LU[k,k]
-      B.setQuick(k, B.getQuick(k) / LU.getQuick(k, k));
+      B.setQuick(k, B.getQuick(k) / lu.getQuick(k, k));
       double f = B.getQuick(k);
       if (f != 0) {
         for (int i = 0; i < k; i++) {
           // B[i] -= B[k]*LU[i][k];
-          double v = LU.getQuick(i, k);
+          double v = lu.getQuick(i, k);
           if (v != 0) {
             B.setQuick(i, B.getQuick(i) - f * v);
           }
@@ -427,7 +427,7 @@ public class LUDecompositionQuick implem
    * @throws IllegalArgumentException if <tt>A.rows() < A.columns()</tt>.
    */
   public void solve(DoubleMatrix2D B) {
-    Property.checkRectangular(LU);
+    Property.checkRectangular(lu);
     int m = m();
     int n = n();
     if (B.rows() != m) {
@@ -452,9 +452,9 @@ public class LUDecompositionQuick implem
     int nx = B.columns();
 
     //precompute and cache some views to avoid regenerating them time and again
-    DoubleMatrix1D[] Brows = new DoubleMatrix1D[n];
+    DoubleMatrix1D[] brows = new DoubleMatrix1D[n];
     for (int k = 0; k < n; k++) {
-      Brows[k] = B.viewRow(k);
+      brows[k] = B.viewRow(k);
     }
 
     // transformations
@@ -463,30 +463,30 @@ public class LUDecompositionQuick implem
 
     IntArrayList nonZeroIndexes =
         new IntArrayList(); // sparsity
-    DoubleMatrix1D Browk = org.apache.mahout.math.matrix.DoubleFactory1D.dense.make(nx); // blocked row k
+    DoubleMatrix1D bRowk = org.apache.mahout.math.matrix.DoubleFactory1D.dense.make(nx); // blocked row k
 
     // Solve L*Y = B(piv,:)
-    int CUT_OFF = 10;
+    int cutOff = 10;
     for (int k = 0; k < n; k++) {
       // blocking (make copy of k-th row to localize references)
-      Browk.assign(Brows[k]);
+      bRowk.assign(brows[k]);
 
       // sparsity detection
-      int maxCardinality = nx / CUT_OFF; // == heuristic depending on speedup
-      Browk.getNonZeros(nonZeroIndexes, null, maxCardinality);
+      int maxCardinality = nx / cutOff; // == heuristic depending on speedup
+      bRowk.getNonZeros(nonZeroIndexes, null, maxCardinality);
       int cardinality = nonZeroIndexes.size();
-      boolean sparse = (cardinality < maxCardinality);
+      boolean sparse = cardinality < maxCardinality;
 
       for (int i = k + 1; i < n; i++) {
         //for (int j = 0; j < nx; j++) B[i][j] -= B[k][j]*LU[i][k];
         //for (int j = 0; j < nx; j++) B.set(i,j, B.get(i,j) - B.get(k,j)*LU.get(i,k));
 
-        minusMult.setMultiplicator(-LU.getQuick(i, k));
+        minusMult.setMultiplicator(-lu.getQuick(i, k));
         if (minusMult.getMultiplicator() != 0) {
           if (sparse) {
-            Brows[i].assign(Browk, minusMult, nonZeroIndexes);
+            brows[i].assign(bRowk, minusMult, nonZeroIndexes);
           } else {
-            Brows[i].assign(Browk, minusMult);
+            brows[i].assign(bRowk, minusMult);
           }
         }
       }
@@ -496,20 +496,20 @@ public class LUDecompositionQuick implem
     for (int k = n - 1; k >= 0; k--) {
       // for (int j = 0; j < nx; j++) B[k][j] /= LU[k][k];
       // for (int j = 0; j < nx; j++) B.set(k,j, B.get(k,j) / LU.get(k,k));
-      div.setMultiplicator(1 / LU.getQuick(k, k));
-      Brows[k].assign(div);
+      div.setMultiplicator(1 / lu.getQuick(k, k));
+      brows[k].assign(div);
 
       // blocking
-      if (Browk == null) {
-        Browk = org.apache.mahout.math.matrix.DoubleFactory1D.dense.make(B.columns());
-      }
-      Browk.assign(Brows[k]);
+      //if (bRowk == null) {
+      //  bRowk = org.apache.mahout.math.matrix.DoubleFactory1D.dense.make(B.columns());
+      //}
+      bRowk.assign(brows[k]);
 
       // sparsity detection
-      int maxCardinality = nx / CUT_OFF; // == heuristic depending on speedup
-      Browk.getNonZeros(nonZeroIndexes, null, maxCardinality);
+      int maxCardinality = nx / cutOff; // == heuristic depending on speedup
+      bRowk.getNonZeros(nonZeroIndexes, null, maxCardinality);
       int cardinality = nonZeroIndexes.size();
-      boolean sparse = (cardinality < maxCardinality);
+      boolean sparse = cardinality < maxCardinality;
 
       //Browk.getNonZeros(nonZeroIndexes,null);
       //boolean sparse = nonZeroIndexes.size() < nx/10;
@@ -518,12 +518,12 @@ public class LUDecompositionQuick implem
         // for (int j = 0; j < nx; j++) B[i][j] -= B[k][j]*LU[i][k];
         // for (int j = 0; j < nx; j++) B.set(i,j, B.get(i,j) - B.get(k,j)*LU.get(i,k));
 
-        minusMult.setMultiplicator(-LU.getQuick(i, k));
+        minusMult.setMultiplicator(-lu.getQuick(i, k));
         if (minusMult.getMultiplicator() != 0) {
           if (sparse) {
-            Brows[i].assign(Browk, minusMult, nonZeroIndexes);
+            brows[i].assign(bRowk, minusMult, nonZeroIndexes);
           } else {
-            Brows[i].assign(Browk, minusMult);
+            brows[i].assign(bRowk, minusMult);
           }
         }
       }
@@ -646,7 +646,7 @@ public class LUDecompositionQuick implem
     }
 
     buf.append("\n\ninverse(A) = ");
-    DoubleMatrix2D identity = org.apache.mahout.math.matrix.DoubleFactory2D.dense.identity(LU.rows());
+    DoubleMatrix2D identity = org.apache.mahout.math.matrix.DoubleFactory2D.dense.identity(lu.rows());
     try {
       this.solve(identity);
       buf.append(String.valueOf(identity));

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/TestSingularValueDecomposition.java Tue Sep  7 13:54:21 2010
@@ -239,7 +239,7 @@ public final class TestSingularValueDeco
     
     for (int i = 0; i < size; ++i) {
       double[] dataI = data[i];
-      double norm2 = 0;
+      double norm2;
       do {
         
         // generate randomly row I

Modified: mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NegativeBinomialTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NegativeBinomialTest.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NegativeBinomialTest.java (original)
+++ mahout/trunk/math/src/test/java/org/apache/mahout/math/jet/random/NegativeBinomialTest.java Tue Sep  7 13:54:21 2010
@@ -28,12 +28,11 @@ import org.apache.mahout.math.MahoutTest
 import org.junit.Test;
 
 import java.io.InputStreamReader;
-import java.util.Locale;
 
 public final class NegativeBinomialTest extends MahoutTestCase {
 
   private static final Splitter onComma = Splitter.on(",").trimResults();
-  private static final int N = 10000;
+  //private static final int N = 10000;
 
   @Test
   public void testDistributionFunctions() throws Exception {

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwDriver.java Tue Sep  7 13:54:21 2010
@@ -76,7 +76,7 @@ public final class CDbwDriver extends Ab
     int numReducers = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
     int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION));
     ClassLoader ccl = Thread.currentThread().getContextClassLoader();
-    DistanceMeasure measure = (DistanceMeasure) ((Class<?>) ccl.loadClass(distanceMeasureClass)).newInstance();
+    DistanceMeasure measure = ccl.loadClass(distanceMeasureClass).asSubclass(DistanceMeasure.class).newInstance();
 
     job(input, null, output, measure, maxIterations, numReducers);
     return 0;
@@ -147,8 +147,8 @@ public final class CDbwDriver extends Ab
       if (!part.getPath().getName().startsWith(".")) {
         Path inPart = part.getPath();
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, inPart, conf);
-        Writable key = (Writable) reader.getKeyClass().newInstance();
-        Writable value = (Writable) reader.getValueClass().newInstance();
+        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
+        Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
         Path path = new Path(output, inPart.getName());
         SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, IntWritable.class, VectorWritable.class);
         while (reader.next(key, value)) {

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java Tue Sep  7 13:54:21 2010
@@ -76,8 +76,8 @@ public class CDbwEvaluator {
   public CDbwEvaluator(Configuration conf, Path clustersIn)
       throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException {
     ClassLoader ccl = Thread.currentThread().getContextClassLoader();
-    Class<?> cl = ccl.loadClass(conf.get(CDbwDriver.DISTANCE_MEASURE_KEY));
-    measure = (DistanceMeasure) cl.newInstance();
+    measure = ccl.loadClass(conf.get(CDbwDriver.DISTANCE_MEASURE_KEY))
+        .asSubclass(DistanceMeasure.class).newInstance();
     representativePoints = CDbwMapper.getRepresentativePoints(conf);
     clusters = loadClusters(conf, clustersIn);
     for (Integer cId : representativePoints.keySet()) {
@@ -200,12 +200,12 @@ public class CDbwEvaluator {
       if (!part.getPath().getName().startsWith(".")) {
         Path inPart = part.getPath();
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, inPart, conf);
-        Writable key = (Writable) reader.getKeyClass().newInstance();
-        Writable value = (Writable) reader.getValueClass().newInstance();
+        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
+        Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
         while (reader.next(key, value)) {
           Cluster cluster = (Cluster) value;
           clusters.put(cluster.getId(), cluster);
-          value = (Writable) reader.getValueClass().newInstance();
+          value = reader.getValueClass().asSubclass(Writable.class).newInstance();
         }
         reader.close();
       }

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwMapper.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwMapper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwMapper.java Tue Sep  7 13:54:21 2010
@@ -73,8 +73,8 @@ public class CDbwMapper extends Mapper<I
     Configuration conf = context.getConfiguration();
     try {
       ClassLoader ccl = Thread.currentThread().getContextClassLoader();
-      Class<?> cl = ccl.loadClass(conf.get(CDbwDriver.DISTANCE_MEASURE_KEY));
-      measure = (DistanceMeasure) cl.newInstance();
+      measure = ccl.loadClass(conf.get(CDbwDriver.DISTANCE_MEASURE_KEY))
+          .asSubclass(DistanceMeasure.class).newInstance();
       representativePoints = getRepresentativePoints(conf);
     } catch (NumberFormatException e) {
       throw new IllegalStateException(e);

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/lda/LDAPrintTopics.java Tue Sep  7 13:54:21 2010
@@ -144,9 +144,9 @@ public final class LDAPrintTopics {
       }
       
       List<String> wordList;
-      if (dictionaryType.equals("text")) {
+      if ("text".equals(dictionaryType)) {
         wordList = Arrays.asList(VectorHelper.loadTermDictionary(new File(dictFile)));
-      } else if (dictionaryType.equals("sequencefile")) {
+      } else if ("sequencefile".equals(dictionaryType)) {
         FileSystem fs = FileSystem.get(new Path(dictFile).toUri(), config);
         wordList = Arrays.asList(VectorHelper.loadTermDictionary(config, fs, dictFile));
       } else {
@@ -157,10 +157,8 @@ public final class LDAPrintTopics {
       
       if (cmdLine.hasOption(outOpt)) {
         File output = new File(cmdLine.getValue(outOpt).toString());
-        if (!output.exists()) {
-          if (!output.mkdirs()) {
-            throw new IOException("Could not create directory: " + output);
-          }
+        if (!output.exists() && !output.mkdirs()) {
+          throw new IOException("Could not create directory: " + output);
         }
         writeTopWords(topWords, output);
       } else {

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/SequenceFileDumper.java Tue Sep  7 13:54:21 2010
@@ -98,8 +98,8 @@ public final class SequenceFileDumper {
           sub = Integer.parseInt(cmdLine.getValue(substringOpt).toString());
         }
         boolean countOnly = cmdLine.hasOption(countOpt);
-        Writable key = (Writable) reader.getKeyClass().newInstance();
-        Writable value = (Writable) reader.getValueClass().newInstance();
+        Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
+        Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
         writer.append("Key class: ").append(String.valueOf(reader.getKeyClass())).append(" Value Class: ")
         .append(String.valueOf(value.getClass())).append('\n');
         writer.flush();

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/clustering/ClusterDumper.java Tue Sep  7 13:54:21 2010
@@ -164,8 +164,8 @@ public final class ClusterDumper extends
         //System.out.println("Input Path: " + path); doesn't this interfere with output?
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
         try {
-          Writable key = (Writable) reader.getKeyClass().newInstance();
-          Writable value = (Writable) reader.getValueClass().newInstance();
+          Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
+          Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
           while (reader.next(key, value)) {
             Cluster cluster = (Cluster) value;
             String fmtStr = useJSON ? cluster.asJsonString() : cluster.asFormatString(dictionary);
@@ -267,7 +267,7 @@ public final class ClusterDumper extends
     FileStatus[] children = fs.listStatus(pointsPathDir, new PathFilter() {
       @Override
       public boolean accept(Path path) {
-        return !(path.getName().endsWith(".crc") || path.getName().equals("_logs"));
+        return !(path.getName().endsWith(".crc") || "_logs".equals(path.getName()));
       }
     });
 
@@ -275,8 +275,8 @@ public final class ClusterDumper extends
       Path path = file.getPath();
       SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
       try {
-        IntWritable key = (IntWritable) reader.getKeyClass().newInstance();
-        WeightedVectorWritable value = (WeightedVectorWritable) reader.getValueClass().newInstance();
+        IntWritable key = reader.getKeyClass().asSubclass(IntWritable.class).newInstance();
+        WeightedVectorWritable value = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance();
         while (reader.next(key, value)) {
           // value is the cluster id as an int, key is the name/id of the
           // vector, but that doesn't matter because we only care about printing
@@ -288,7 +288,7 @@ public final class ClusterDumper extends
             result.put(key.get(), pointList);
           }
           pointList.add(value);
-          value = (WeightedVectorWritable) reader.getValueClass().newInstance();
+          value = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance();
         }
       } catch (InstantiationException e) {
         log.error("Exception", e);

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapper.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/CollocMapper.java Tue Sep  7 13:54:21 2010
@@ -94,7 +94,8 @@ public class CollocMapper extends Mapper
     ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxShingleSize);
     int count = 0; // ngram count
 
-    OpenObjectIntHashMap<String> ngrams = new OpenObjectIntHashMap<String>(value.getEntries().size() * (maxShingleSize - 1));
+    OpenObjectIntHashMap<String> ngrams =
+        new OpenObjectIntHashMap<String>(value.getEntries().size() * (maxShingleSize - 1));
     OpenObjectIntHashMap<String> unigrams = new OpenObjectIntHashMap<String>(value.getEntries().size());
 
     do {

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyGroupComparator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyGroupComparator.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyGroupComparator.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKeyGroupComparator.java Tue Sep  7 13:54:21 2010
@@ -20,10 +20,12 @@ package org.apache.mahout.utils.nlp.coll
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.WritableComparator;
 
+import java.io.Serializable;
+
 /** Group GramKeys based on their Gram, ignoring the secondary sort key, so that all keys with the same Gram are sent
  *  to the same call of the reduce method, sorted in natural order (for GramKeys).
  */
-class GramKeyGroupComparator extends WritableComparator {
+class GramKeyGroupComparator extends WritableComparator implements Serializable {
 
   GramKeyGroupComparator() {
     super(GramKey.class, true);

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/VectorDumper.java Tue Sep  7 13:54:21 2010
@@ -30,21 +30,21 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
 import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
-import org.apache.mahout.utils.vectors.SequenceFileVectorIterable.SeqFileIterator;
+import org.apache.mahout.math.VectorWritable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.FileWriter;
-import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
 
 /**
- * Can read in a {@link org.apache.hadoop.io.SequenceFile} of {@link org.apache.mahout.math.Vector}s and dump
- * out the results using {@link org.apache.mahout.math.Vector#asFormatString()} to either the console or to a
+ * Can read in a {@link SequenceFile} of {@link Vector}s and dump
+ * out the results using {@link Vector#asFormatString()} to either the console or to a
  * file.
  */
 public final class VectorDumper {
@@ -54,7 +54,7 @@ public final class VectorDumper {
   private VectorDumper() {
   }
 
-  public static void main(String[] args) throws IOException {
+  public static void main(String[] args) throws Exception {
     DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
     ArgumentBuilder abuilder = new ArgumentBuilder();
     GroupBuilder gbuilder = new GroupBuilder();
@@ -124,21 +124,25 @@ public final class VectorDumper {
         boolean useJSON = cmdLine.hasOption(centroidJSonOpt);
         boolean sizeOnly = cmdLine.hasOption(sizeOpt);
         SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
+
+        Writable keyWritable = reader.getKeyClass().asSubclass(Writable.class).newInstance();
+        Writable valueWritable = reader.getValueClass().asSubclass(Writable.class).newInstance();
+        boolean transposeKeyValue = cmdLine.hasOption(vectorAsKeyOpt);
         try {
-          Iterable<Vector> vectorIterable = new SequenceFileVectorIterable(reader, cmdLine.hasOption(vectorAsKeyOpt));
           Writer writer = cmdLine.hasOption(outputOpt)
                   ? new FileWriter(cmdLine.getValue(outputOpt).toString())
                   : new OutputStreamWriter(System.out);
           try {
             boolean printKey = cmdLine.hasOption(printKeyOpt);
-            SeqFileIterator iterator = (SeqFileIterator) vectorIterable.iterator();
             long i = 0;
-            while (iterator.hasNext()) {
-              Vector vector = iterator.next();
+            while (reader.next(keyWritable, valueWritable)) {
               if (printKey) {
-                writer.write(iterator.key().toString());
-                writer.write("\t");
+                Writable notTheVectorWritable = transposeKeyValue ? valueWritable : keyWritable;
+                writer.write(notTheVectorWritable.toString());
+                writer.write('\t');
               }
+              VectorWritable vectorWritable = (VectorWritable) (transposeKeyValue ? keyWritable : valueWritable);
+              Vector vector = vectorWritable.get();
               if (sizeOnly) {
                 if (vector instanceof NamedVector) {
                   writer.write(((NamedVector) vector).getName());
@@ -154,9 +158,7 @@ public final class VectorDumper {
                 writer.write(fmtStr);
                 writer.write('\n');
               }
-              //i++;
             }
-            //System.out.println("Dumped " + i + " Vectors");
           } finally {
             writer.close();
           }

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/Driver.java Tue Sep  7 13:54:21 2010
@@ -159,9 +159,9 @@ public final class Driver {
         Weight weight;
         if (cmdLine.hasOption(weightOpt)) {
           String wString = cmdLine.getValue(weightOpt).toString();
-          if (wString.equalsIgnoreCase("tf")) {
+          if ("tf".equalsIgnoreCase(wString)) {
             weight = new TF();
-          } else if (wString.equalsIgnoreCase("tfidf")) {
+          } else if ("tfidf".equalsIgnoreCase(wString)) {
             weight = new TFIDF();
           } else {
             throw new OptionException(weightOpt);
@@ -188,7 +188,7 @@ public final class Driver {
         double norm = LuceneIterable.NO_NORMALIZING;
         if (cmdLine.hasOption(powerOpt)) {
           String power = cmdLine.getValue(powerOpt).toString();
-          if (power.equals("INF")) {
+          if ("INF".equals(power)) {
             norm = Double.POSITIVE_INFINITY;
           } else {
             norm = Double.parseDouble(power);
@@ -213,7 +213,7 @@ public final class Driver {
         VectorWriter vectorWriter;
         if (cmdLine.hasOption(outWriterOpt)) {
           String outWriter = cmdLine.getValue(outWriterOpt).toString();
-          if (outWriter.equals("file")) {
+          if ("file".equals(outWriter)) {
             BufferedWriter writer = new BufferedWriter(new FileWriter(outFile));
             vectorWriter = new JWriterVectorWriter(writer);
           } else {

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=993365&r1=993364&r2=993365&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Tue Sep  7 13:54:21 2010
@@ -263,14 +263,14 @@ public final class TestClusterDumper ext
     FileSystem fs = FileSystem.get(eigenvectors.toUri(), conf);
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, eigenvectors, conf);
     try {
-      Writable key = (Writable) reader.getKeyClass().newInstance();
-      Writable value = (Writable) reader.getValueClass().newInstance();
+      Writable key = reader.getKeyClass().asSubclass(Writable.class).newInstance();
+      Writable value = reader.getValueClass().asSubclass(Writable.class).newInstance();
       i = 0;
       while (reader.next(key, value)) {
         Vector v = ((VectorWritable) value).get();
         p.assignColumn(i, v);
         System.out.println("k=" + key.toString() + " V=" + AbstractCluster.formatVector(v, termDictionary));
-        value = (Writable) reader.getValueClass().newInstance();
+        value = reader.getValueClass().asSubclass(Writable.class).newInstance();
         i++;
       }
     } finally {



Mime
View raw message