mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gsing...@apache.org
Subject svn commit: r891983 [16/47] - in /lucene/mahout/trunk: ./ core/ core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ core/src/main/java/org/apache/mahout/clustering/ core/src/main/java/org/apache/mahout/clustering/canopy/ core/src/main/java/org/a...
Date Thu, 17 Dec 2009 23:22:41 GMT
Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Probability.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Probability.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Probability.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Probability.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,793 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat;
+
+import org.apache.mahout.math.jet.math.Polynomial;
+
+/** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
+@Deprecated
+public class Probability extends org.apache.mahout.math.jet.math.Constants {
+
+  /**
+   * ********************************************** COEFFICIENTS FOR METHOD  normalInverse()   *
+   * ***********************************************
+   */
+  /* approximation for 0 <= |y - 0.5| <= 3/8 */
+  private static final double[] P0 = {
+      -5.99633501014107895267E1,
+      9.80010754185999661536E1,
+      -5.66762857469070293439E1,
+      1.39312609387279679503E1,
+      -1.23916583867381258016E0,
+  };
+  private static final double[] Q0 = {
+      /* 1.00000000000000000000E0,*/
+      1.95448858338141759834E0,
+      4.67627912898881538453E0,
+      8.63602421390890590575E1,
+      -2.25462687854119370527E2,
+      2.00260212380060660359E2,
+      -8.20372256168333339912E1,
+      1.59056225126211695515E1,
+      -1.18331621121330003142E0,
+  };
+
+
+  /* Approximation for interval z = sqrt(-2 log y ) between 2 and 8
+   * i.e., y between exp(-2) = .135 and exp(-32) = 1.27e-14.
+   */
+  private static final double[] P1 = {
+      4.05544892305962419923E0,
+      3.15251094599893866154E1,
+      5.71628192246421288162E1,
+      4.40805073893200834700E1,
+      1.46849561928858024014E1,
+      2.18663306850790267539E0,
+      -1.40256079171354495875E-1,
+      -3.50424626827848203418E-2,
+      -8.57456785154685413611E-4,
+  };
+  private static final double[] Q1 = {
+      /*  1.00000000000000000000E0,*/
+      1.57799883256466749731E1,
+      4.53907635128879210584E1,
+      4.13172038254672030440E1,
+      1.50425385692907503408E1,
+      2.50464946208309415979E0,
+      -1.42182922854787788574E-1,
+      -3.80806407691578277194E-2,
+      -9.33259480895457427372E-4,
+  };
+
+  /* Approximation for interval z = sqrt(-2 log y ) between 8 and 64
+   * i.e., y between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
+   */
+  private static final double[] P2 = {
+      3.23774891776946035970E0,
+      6.91522889068984211695E0,
+      3.93881025292474443415E0,
+      1.33303460815807542389E0,
+      2.01485389549179081538E-1,
+      1.23716634817820021358E-2,
+      3.01581553508235416007E-4,
+      2.65806974686737550832E-6,
+      6.23974539184983293730E-9,
+  };
+  private static final double[] Q2 = {
+      /*  1.00000000000000000000E0,*/
+      6.02427039364742014255E0,
+      3.67983563856160859403E0,
+      1.37702099489081330271E0,
+      2.16236993594496635890E-1,
+      1.34204006088543189037E-2,
+      3.28014464682127739104E-4,
+      2.89247864745380683936E-6,
+      6.79019408009981274425E-9,
+  };
+
+  /** Makes this class non instantiable, but still let's others inherit from it. */
+  protected Probability() {
+  }
+
+  /**
+   * Returns the area from zero to <tt>x</tt> under the beta density function.
+   * <pre>
+   *                          x
+   *            -             -
+   *           | (a+b)       | |  a-1      b-1
+   * P(x)  =  ----------     |   t    (1-t)    dt
+   *           -     -     | |
+   *          | (a) | (b)   -
+   *                         0
+   * </pre>
+   * This function is identical to the incomplete beta integral function <tt>Gamma.incompleteBeta(a, b, x)</tt>.
+   *
+   * The complemented function is
+   *
+   * <tt>1 - P(1-x)  =  Gamma.incompleteBeta( b, a, x )</tt>;
+   */
+  public static double beta(double a, double b, double x) {
+    return Gamma.incompleteBeta(a, b, x);
+  }
+
+  /**
+   * Returns the area under the right hand tail (from <tt>x</tt> to infinity) of the beta density function.
+   *
+   * This function is identical to the incomplete beta integral function <tt>Gamma.incompleteBeta(b, a, x)</tt>.
+   */
+  public static double betaComplemented(double a, double b, double x) {
+    return Gamma.incompleteBeta(b, a, x);
+  }
+
+  /**
+   * Returns the sum of the terms <tt>0</tt> through <tt>k</tt> of the Binomial probability density.
+   * <pre>
+   *   k
+   *   --  ( n )   j      n-j
+   *   >   (   )  p  (1-p)
+   *   --  ( j )
+   *  j=0
+   * </pre>
+   * The terms are not summed directly; instead the incomplete beta integral is employed, according to the formula <p>
+   * <tt>y = binomial( k, n, p ) = Gamma.incompleteBeta( n-k, k+1, 1-p )</tt>. <p> All arguments must be positive,
+   *
+   * @param k end term.
+   * @param n the number of trials.
+   * @param p the probability of success (must be in <tt>(0.0,1.0)</tt>).
+   */
+  public static double binomial(int k, int n, double p) {
+    if ((p < 0.0) || (p > 1.0)) {
+      throw new IllegalArgumentException();
+    }
+    if ((k < 0) || (n < k)) {
+      throw new IllegalArgumentException();
+    }
+
+    if (k == n) {
+      return (1.0);
+    }
+    if (k == 0) {
+      return Math.pow(1.0 - p, n);
+    }
+
+    return Gamma.incompleteBeta(n - k, k + 1, 1.0 - p);
+  }
+
+  /**
+   * Returns the sum of the terms <tt>k+1</tt> through <tt>n</tt> of the Binomial probability density.
+   * <pre>
+   *   n
+   *   --  ( n )   j      n-j
+   *   >   (   )  p  (1-p)
+   *   --  ( j )
+   *  j=k+1
+   * </pre>
+   * The terms are not summed directly; instead the incomplete beta integral is employed, according to the formula <p>
+   * <tt>y = binomialComplemented( k, n, p ) = Gamma.incompleteBeta( k+1, n-k, p )</tt>. <p> All arguments must be
+   * positive,
+   *
+   * @param k end term.
+   * @param n the number of trials.
+   * @param p the probability of success (must be in <tt>(0.0,1.0)</tt>).
+   */
+  public static double binomialComplemented(int k, int n, double p) {
+    if ((p < 0.0) || (p > 1.0)) {
+      throw new IllegalArgumentException();
+    }
+    if ((k < 0) || (n < k)) {
+      throw new IllegalArgumentException();
+    }
+
+    if (k == n) {
+      return (0.0);
+    }
+    if (k == 0) {
+      return 1.0 - Math.pow(1.0 - p, n);
+    }
+
+    return Gamma.incompleteBeta(k + 1, n - k, p);
+  }
+
+  /**
+   * Returns the area under the left hand tail (from 0 to <tt>x</tt>) of the Chi square probability density function
+   * with <tt>v</tt> degrees of freedom.
+   * <pre>
+   *                                  inf.
+   *                                    -
+   *                        1          | |  v/2-1  -t/2
+   *  P( x | v )   =   -----------     |   t      e     dt
+   *                    v/2  -       | |
+   *                   2    | (v/2)   -
+   *                                   x
+   * </pre>
+   * where <tt>x</tt> is the Chi-square variable. <p> The incomplete gamma integral is used, according to the formula
+   * <p> <tt>y = chiSquare( v, x ) = incompleteGamma( v/2.0, x/2.0 )</tt>. <p> The arguments must both be positive.
+   *
+   * @param v degrees of freedom.
+   * @param x integration end point.
+   */
+  public static double chiSquare(double v, double x) throws ArithmeticException {
+    if (x < 0.0 || v < 1.0) {
+      return 0.0;
+    }
+    return Gamma.incompleteGamma(v / 2.0, x / 2.0);
+  }
+
+  /**
+   * Returns the area under the right hand tail (from <tt>x</tt> to infinity) of the Chi square probability density
+   * function with <tt>v</tt> degrees of freedom.
+   * <pre>
+   *                                  inf.
+   *                                    -
+   *                        1          | |  v/2-1  -t/2
+   *  P( x | v )   =   -----------     |   t      e     dt
+   *                    v/2  -       | |
+   *                   2    | (v/2)   -
+   *                                   x
+   * </pre>
+   * where <tt>x</tt> is the Chi-square variable.
+   *
+   * The incomplete gamma integral is used, according to the formula
+   *
+   * <tt>y = chiSquareComplemented( v, x ) = incompleteGammaComplement( v/2.0, x/2.0 )</tt>.
+   *
+   *
+   * The arguments must both be positive.
+   *
+   * @param v degrees of freedom.
+   */
+  public static double chiSquareComplemented(double v, double x) throws ArithmeticException {
+    if (x < 0.0 || v < 1.0) {
+      return 0.0;
+    }
+    return Gamma.incompleteGammaComplement(v / 2.0, x / 2.0);
+  }
+
+  /**
+   * Returns the error function of the normal distribution; formerly named <tt>erf</tt>. The integral is
+   * <pre>
+   *                           x
+   *                            -
+   *                 2         | |          2
+   *   erf(x)  =  --------     |    exp( - t  ) dt.
+   *              sqrt(pi)   | |
+   *                          -
+   *                           0
+   * </pre>
+   * <b>Implementation:</b> For <tt>0 <= |x| < 1, erf(x) = x * P4(x**2)/Q5(x**2)</tt>; otherwise <tt>erf(x) = 1 -
+   * erfc(x)</tt>. <p> Code adapted from the <A HREF="http://www.sci.usq.edu.au/staff/leighb/graph/Top.html">Java 2D
+   * Graph Package 2.4</A>, which in turn is a port from the <A HREF="http://people.ne.mediaone.net/moshier/index.html#Cephes">Cephes
+   * 2.2</A> Math Library (C).
+   *
+   * @param x the argument to the function.
+   */
+  public static double errorFunction(double x) throws ArithmeticException {
+    double[] T = {
+        9.60497373987051638749E0,
+        9.00260197203842689217E1,
+        2.23200534594684319226E3,
+        7.00332514112805075473E3,
+        5.55923013010394962768E4
+    };
+    double[] U = {
+        //1.00000000000000000000E0,
+        3.35617141647503099647E1,
+        5.21357949780152679795E2,
+        4.59432382970980127987E3,
+        2.26290000613890934246E4,
+        4.92673942608635921086E4
+    };
+
+    if (Math.abs(x) > 1.0) {
+      return (1.0 - errorFunctionComplemented(x));
+    }
+    double z = x * x;
+    double y = x * Polynomial.polevl(z, T, 4) / Polynomial.p1evl(z, U, 5);
+    return y;
+  }
+
+  /**
+   * Returns the complementary Error function of the normal distribution; formerly named <tt>erfc</tt>.
+   * <pre>
+   *  1 - erf(x) =
+   *
+   *                           inf.
+   *                             -
+   *                  2         | |          2
+   *   erfc(x)  =  --------     |    exp( - t  ) dt
+   *               sqrt(pi)   | |
+   *                           -
+   *                            x
+   * </pre>
+   * <b>Implementation:</b> For small x, <tt>erfc(x) = 1 - erf(x)</tt>; otherwise rational approximations are computed.
+   * <p> Code adapted from the <A HREF="http://www.sci.usq.edu.au/staff/leighb/graph/Top.html">Java 2D Graph Package
+   * 2.4</A>, which in turn is a port from the <A HREF="http://people.ne.mediaone.net/moshier/index.html#Cephes">Cephes
+   * 2.2</A> Math Library (C).
+   *
+   * @param a the argument to the function.
+   */
+  public static double errorFunctionComplemented(double a) throws ArithmeticException {
+    double x;
+
+    double[] P = {
+        2.46196981473530512524E-10,
+        5.64189564831068821977E-1,
+        7.46321056442269912687E0,
+        4.86371970985681366614E1,
+        1.96520832956077098242E2,
+        5.26445194995477358631E2,
+        9.34528527171957607540E2,
+        1.02755188689515710272E3,
+        5.57535335369399327526E2
+    };
+    double[] Q = {
+        //1.0
+        1.32281951154744992508E1,
+        8.67072140885989742329E1,
+        3.54937778887819891062E2,
+        9.75708501743205489753E2,
+        1.82390916687909736289E3,
+        2.24633760818710981792E3,
+        1.65666309194161350182E3,
+        5.57535340817727675546E2
+    };
+
+    double[] R = {
+        5.64189583547755073984E-1,
+        1.27536670759978104416E0,
+        5.01905042251180477414E0,
+        6.16021097993053585195E0,
+        7.40974269950448939160E0,
+        2.97886665372100240670E0
+    };
+    double[] S = {
+        //1.00000000000000000000E0,
+        2.26052863220117276590E0,
+        9.39603524938001434673E0,
+        1.20489539808096656605E1,
+        1.70814450747565897222E1,
+        9.60896809063285878198E0,
+        3.36907645100081516050E0
+    };
+
+    if (a < 0.0) {
+      x = -a;
+    } else {
+      x = a;
+    }
+
+    if (x < 1.0) {
+      return 1.0 - errorFunction(a);
+    }
+
+    double z = -a * a;
+
+    if (z < -MAXLOG) {
+      if (a < 0) {
+        return (2.0);
+      } else {
+        return (0.0);
+      }
+    }
+
+    z = Math.exp(z);
+
+    double q;
+    double p;
+    if (x < 8.0) {
+      p = Polynomial.polevl(x, P, 8);
+      q = Polynomial.p1evl(x, Q, 8);
+    } else {
+      p = Polynomial.polevl(x, R, 5);
+      q = Polynomial.p1evl(x, S, 6);
+    }
+
+    double y = (z * p) / q;
+
+    if (a < 0) {
+      y = 2.0 - y;
+    }
+
+    if (y == 0.0) {
+      if (a < 0) {
+        return 2.0;
+      } else {
+        return (0.0);
+      }
+    }
+
+    return y;
+  }
+
+  /**
+   * Returns the integral from zero to <tt>x</tt> of the gamma probability density function.
+   * <pre>
+   *                x
+   *        b       -
+   *       a       | |   b-1  -at
+   * y =  -----    |    t    e    dt
+   *       -     | |
+   *      | (b)   -
+   *               0
+   * </pre>
+   * The incomplete gamma integral is used, according to the relation
+   *
+   * <tt>y = Gamma.incompleteGamma( b, a*x )</tt>.
+   *
+   * @param a the paramater a (alpha) of the gamma distribution.
+   * @param b the paramater b (beta, lambda) of the gamma distribution.
+   * @param x integration end point.
+   */
+  public static double gamma(double a, double b, double x) {
+    if (x < 0.0) {
+      return 0.0;
+    }
+    return Gamma.incompleteGamma(b, a * x);
+  }
+
+  /**
+   * Returns the integral from <tt>x</tt> to infinity of the gamma probability density function:
+   * <pre>
+   *               inf.
+   *        b       -
+   *       a       | |   b-1  -at
+   * y =  -----    |    t    e    dt
+   *       -     | |
+   *      | (b)   -
+   *               x
+   * </pre>
+   * The incomplete gamma integral is used, according to the relation <p> y = Gamma.incompleteGammaComplement( b, a*x
+   * ).
+   *
+   * @param a the paramater a (alpha) of the gamma distribution.
+   * @param b the paramater b (beta, lambda) of the gamma distribution.
+   * @param x integration end point.
+   */
+  public static double gammaComplemented(double a, double b, double x) {
+    if (x < 0.0) {
+      return 0.0;
+    }
+    return Gamma.incompleteGammaComplement(b, a * x);
+  }
+
+  /**
+   * Returns the sum of the terms <tt>0</tt> through <tt>k</tt> of the Negative Binomial Distribution.
+   * <pre>
+   *   k
+   *   --  ( n+j-1 )   n      j
+   *   >   (       )  p  (1-p)
+   *   --  (   j   )
+   *  j=0
+   * </pre>
+   * In a sequence of Bernoulli trials, this is the probability that <tt>k</tt> or fewer failures precede the
+   * <tt>n</tt>-th success. <p> The terms are not computed individually; instead the incomplete beta integral is
+   * employed, according to the formula <p> <tt>y = negativeBinomial( k, n, p ) = Gamma.incompleteBeta( n, k+1, p
+   * )</tt>.
+   *
+   * All arguments must be positive,
+   *
+   * @param k end term.
+   * @param n the number of trials.
+   * @param p the probability of success (must be in <tt>(0.0,1.0)</tt>).
+   */
+  public static double negativeBinomial(int k, int n, double p) {
+    if ((p < 0.0) || (p > 1.0)) {
+      throw new IllegalArgumentException();
+    }
+    if (k < 0) {
+      return 0.0;
+    }
+
+    return Gamma.incompleteBeta(n, k + 1, p);
+  }
+
+  /**
+   * Returns the sum of the terms <tt>k+1</tt> to infinity of the Negative Binomial distribution.
+   * <pre>
+   *   inf
+   *   --  ( n+j-1 )   n      j
+   *   >   (       )  p  (1-p)
+   *   --  (   j   )
+   *  j=k+1
+   * </pre>
+   * The terms are not computed individually; instead the incomplete beta integral is employed, according to the formula
+   * <p> y = negativeBinomialComplemented( k, n, p ) = Gamma.incompleteBeta( k+1, n, 1-p ).
+   *
+   * All arguments must be positive,
+   *
+   * @param k end term.
+   * @param n the number of trials.
+   * @param p the probability of success (must be in <tt>(0.0,1.0)</tt>).
+   */
+  public static double negativeBinomialComplemented(int k, int n, double p) {
+    if ((p < 0.0) || (p > 1.0)) {
+      throw new IllegalArgumentException();
+    }
+    if (k < 0) {
+      return 0.0;
+    }
+
+    return Gamma.incompleteBeta(k + 1, n, 1.0 - p);
+  }
+
+  /**
+   * Returns the area under the Normal (Gaussian) probability density function, integrated from minus infinity to
+   * <tt>x</tt> (assumes mean is zero, variance is one).
+   * <pre>
+   *                            x
+   *                             -
+   *                   1        | |          2
+   *  normal(x)  = ---------    |    exp( - t /2 ) dt
+   *               sqrt(2pi)  | |
+   *                           -
+   *                          -inf.
+   *
+   *             =  ( 1 + erf(z) ) / 2
+   *             =  erfc(z) / 2
+   * </pre>
+   * where <tt>z = x/sqrt(2)</tt>. Computation is via the functions <tt>errorFunction</tt> and
+   * <tt>errorFunctionComplement</tt>.
+   */
+  public static double normal(double a) throws ArithmeticException {
+
+    double x = a * SQRTH;
+    double z = Math.abs(x);
+
+    double y;
+    if (z < SQRTH) {
+      y = 0.5 + 0.5 * errorFunction(x);
+    } else {
+      y = 0.5 * errorFunctionComplemented(z);
+      if (x > 0) {
+        y = 1.0 - y;
+      }
+    }
+
+    return y;
+  }
+
+  /**
+   * Returns the area under the Normal (Gaussian) probability density function, integrated from minus infinity to
+   * <tt>x</tt>.
+   * <pre>
+   *                            x
+   *                             -
+   *                   1        | |                 2
+   *  normal(x)  = ---------    |    exp( - (t-mean) / 2v ) dt
+   *               sqrt(2pi*v)| |
+   *                           -
+   *                          -inf.
+   *
+   * </pre>
+   * where <tt>v = variance</tt>. Computation is via the functions <tt>errorFunction</tt>.
+   *
+   * @param mean     the mean of the normal distribution.
+   * @param variance the variance of the normal distribution.
+   * @param x        the integration limit.
+   */
+  public static double normal(double mean, double variance, double x) throws ArithmeticException {
+    if (x > 0) {
+      return 0.5 + 0.5 * errorFunction((x - mean) / Math.sqrt(2.0 * variance));
+    } else {
+      return 0.5 - 0.5 * errorFunction((-(x - mean)) / Math.sqrt(2.0 * variance));
+    }
+  }
+
+  /**
+   * Returns the value, <tt>x</tt>, for which the area under the Normal (Gaussian) probability density function
+   * (integrated from minus infinity to <tt>x</tt>) is equal to the argument <tt>y</tt> (assumes mean is zero, variance
+   * is one); formerly named <tt>ndtri</tt>. <p> For small arguments <tt>0 < y < exp(-2)</tt>, the program computes
+   * <tt>z = sqrt( -2.0 * log(y) )</tt>;  then the approximation is <tt>x = z - log(z)/z  - (1/z) P(1/z) / Q(1/z)</tt>.
+   * There are two rational functions P/Q, one for <tt>0 < y < exp(-32)</tt> and the other for <tt>y</tt> up to
+   * <tt>exp(-2)</tt>. For larger arguments, <tt>w = y - 0.5</tt>, and  <tt>x/sqrt(2pi) = w + w**3
+   * R(w**2)/S(w**2))</tt>.
+   */
+  public static double normalInverse(double y0) throws ArithmeticException {
+
+    double s2pi = Math.sqrt(2.0 * Math.PI);
+
+    if (y0 <= 0.0) {
+      throw new IllegalArgumentException();
+    }
+    if (y0 >= 1.0) {
+      throw new IllegalArgumentException();
+    }
+    int code = 1;
+    double y = y0;
+    if (y > (1.0 - 0.13533528323661269189)) { /* 0.135... = exp(-2) */
+      y = 1.0 - y;
+      code = 0;
+    }
+
+    double x;
+    if (y > 0.13533528323661269189) {
+      y -= 0.5;
+      double y2 = y * y;
+      x = y + y * (y2 * Polynomial.polevl(y2, P0, 4) / Polynomial.p1evl(y2, Q0, 8));
+      x *= s2pi;
+      return (x);
+    }
+
+    x = Math.sqrt(-2.0 * Math.log(y));
+    double x0 = x - Math.log(x) / x;
+
+    double z = 1.0 / x;
+    double x1;
+    if (x < 8.0) /* y > exp(-32) = 1.2664165549e-14 */ {
+      x1 = z * Polynomial.polevl(z, P1, 8) / Polynomial.p1evl(z, Q1, 8);
+    } else {
+      x1 = z * Polynomial.polevl(z, P2, 8) / Polynomial.p1evl(z, Q2, 8);
+    }
+    x = x0 - x1;
+    if (code != 0) {
+      x = -x;
+    }
+    return (x);
+  }
+
+  /**
+   * Returns the sum of the first <tt>k</tt> terms of the Poisson distribution.
+   * <pre>
+   *   k         j
+   *   --   -m  m
+   *   >   e    --
+   *   --       j!
+   *  j=0
+   * </pre>
+   * The terms are not summed directly; instead the incomplete gamma integral is employed, according to the relation <p>
+   * <tt>y = poisson( k, m ) = Gamma.incompleteGammaComplement( k+1, m )</tt>.
+   *
+   * The arguments must both be positive.
+   *
+   * @param k    number of terms.
+   * @param mean the mean of the poisson distribution.
+   */
+  public static double poisson(int k, double mean) throws ArithmeticException {
+    if (mean < 0) {
+      throw new IllegalArgumentException();
+    }
+    if (k < 0) {
+      return 0.0;
+    }
+    return Gamma.incompleteGammaComplement((double) (k + 1), mean);
+  }
+
+  /**
+   * Returns the sum of the terms <tt>k+1</tt> to <tt>Infinity</tt> of the Poisson distribution.
+   * <pre>
+   *  inf.       j
+   *   --   -m  m
+   *   >   e    --
+   *   --       j!
+   *  j=k+1
+   * </pre>
+   * The terms are not summed directly; instead the incomplete gamma integral is employed, according to the formula <p>
+   * <tt>y = poissonComplemented( k, m ) = Gamma.incompleteGamma( k+1, m )</tt>.
+   *
+   * The arguments must both be positive.
+   *
+   * @param k    start term.
+   * @param mean the mean of the poisson distribution.
+   */
+  public static double poissonComplemented(int k, double mean) throws ArithmeticException {
+    if (mean < 0) {
+      throw new IllegalArgumentException();
+    }
+    if (k < -1) {
+      return 0.0;
+    }
+    return Gamma.incompleteGamma((double) (k + 1), mean);
+  }
+
+  /**
+   * Returns the integral from minus infinity to <tt>t</tt> of the Student-t distribution with <tt>k &gt; 0</tt> degrees
+   * of freedom.
+   * <pre>
+   *                                      t
+   *                                      -
+   *                                     | |
+   *              -                      |         2   -(k+1)/2
+   *             | ( (k+1)/2 )           |  (     x   )
+   *       ----------------------        |  ( 1 + --- )        dx
+   *                     -               |  (      k  )
+   *       sqrt( k pi ) | ( k/2 )        |
+   *                                   | |
+   *                                    -
+   *                                   -inf.
+   * </pre>
+   * Relation to incomplete beta integral: <p> <tt>1 - studentT(k,t) = 0.5 * Gamma.incompleteBeta( k/2, 1/2, z )</tt>
+   * where <tt>z = k/(k + t**2)</tt>. <p> Since the function is symmetric about <tt>t=0</tt>, the area under the right
+   * tail of the density is found by calling the function with <tt>-t</tt> instead of <tt>t</tt>.
+   *
+   * @param k degrees of freedom.
+   * @param t integration end point.
+   */
+  public static double studentT(double k, double t) throws ArithmeticException {
+    if (k <= 0) {
+      throw new IllegalArgumentException();
+    }
+    if (t == 0) {
+      return (0.5);
+    }
+
+    double cdf = 0.5 * Gamma.incompleteBeta(0.5 * k, 0.5, k / (k + t * t));
+
+    if (t >= 0) {
+      cdf = 1.0 - cdf;
+    } // fixes bug reported by stefan.bentink@molgen.mpg.de
+
+    return cdf;
+  }
+
+  /**
+   * Returns the value, <tt>t</tt>, for which the area under the Student-t probability density function (integrated from
+   * minus infinity to <tt>t</tt>) is equal to <tt>1-alpha/2</tt>. The value returned corresponds to usual Student
+   * t-distribution lookup table for <tt>t<sub>alpha[size]</sub></tt>. <p> The function uses the studentT function to
+   * determine the return value iteratively.
+   *
+   * @param alpha probability
+   * @param size  size of data set
+   */
+  public static double studentTInverse(double alpha, int size) {
+    double cumProb = 1 - alpha / 2; // Cumulative probability
+
+    double x1 = normalInverse(cumProb);
+
+    // Return inverse of normal for large size
+    if (size > 200) {
+      return x1;
+    }
+
+    // Find a pair of x1,x2 that braket zero
+    double f1 = studentT(size, x1) - cumProb;
+    double x2 = x1;
+    double f2 = f1;
+    do {
+      if (f1 > 0) {
+        x2 /= 2;
+      } else {
+        x2 += x1;
+      }
+      f2 = studentT(size, x2) - cumProb;
+    } while (f1 * f2 > 0);
+
+    // Find better approximation
+    // Pegasus-method
+    do {
+      // Calculate slope of secant and t value for which it is 0.
+      double s12 = (f2 - f1) / (x2 - x1);
+      double x3 = x2 - f2 / s12;
+
+      // Calculate function value at x3
+      double f3 = studentT(size, x3) - cumProb;
+      if (Math.abs(f3) < 1e-8) { // This criteria needs to be very tight!
+        // We found a perfect value -> return
+        return x3;
+      }
+
+      if (f3 * f2 < 0) {
+        x1 = x2;
+        f1 = f2;
+        x2 = x3;
+        f2 = f3;
+      } else {
+        double g = f2 / (f2 + f3);
+        f1 = g * f1;
+        x2 = x3;
+        f2 = f3;
+      }
+    } while (Math.abs(x2 - x1) > 0.001);
+
+    if (Math.abs(f2) <= Math.abs(f1)) {
+      return x2;
+    } else {
+      return x1;
+    }
+  }
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/Probability.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/package.html
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/package.html?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/package.html (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/package.html Thu Dec 17 23:22:16 2009
@@ -0,0 +1,6 @@
+<HTML>
+<BODY>
+Tools for basic and advanced statistics: Estimators, Gamma functions, Beta functions, Probabilities, Special integrals,
+etc.
+</BODY>
+</HTML>

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/Buffer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/Buffer.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/Buffer.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/Buffer.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,77 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat.quantile;
+
+import org.apache.mahout.math.PersistentObject;
+
+/** A buffer holding elements; internally used for computing approximate quantiles. */
+abstract class Buffer extends PersistentObject {
+
+  protected int weight;
+  private int level;
+  protected final int k;
+  protected boolean isAllocated;
+
+  /**
+   * This method was created in VisualAge.
+   *
+   * @param k int
+   */
+  Buffer(int k) {
+    this.k = k;
+    this.weight = 1;
+    this.level = 0;
+    this.isAllocated = false;
+  }
+
+  /** Clears the receiver. */
+  public abstract void clear();
+
+  /** Returns whether the receiver is already allocated. */
+  public boolean isAllocated() {
+    return isAllocated;
+  }
+
+  /** Returns whether the receiver is empty. */
+  public abstract boolean isEmpty();
+
+  /** Returns whether the receiver is empty. */
+  public abstract boolean isFull();
+
+  /** Returns whether the receiver is partial. */
+  public boolean isPartial() {
+    return !(isEmpty() || isFull());
+  }
+
+  /** Returns whether the receiver's level. */
+  public int level() {
+    return level;
+  }
+
+  /** Sets the receiver's level. */
+  public void level(int level) {
+    this.level = level;
+  }
+
+  /** Returns the number of elements contained in the receiver. */
+  public abstract int size();
+
+  /** Sorts the receiver. */
+  public abstract void sort();
+
+  /** Returns whether the receiver's weight. */
+  public int weight() {
+    return weight;
+  }
+
+  /** Sets the receiver's weight. */
+  public void weight(int weight) {
+    this.weight = weight;
+  }
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/Buffer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/BufferSet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/BufferSet.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/BufferSet.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/BufferSet.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,16 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat.quantile;
+
+import org.apache.mahout.math.PersistentObject;
+
+/** An abstract set of buffers; internally used for computing approximate quantiles. */
+abstract class BufferSet extends PersistentObject {
+
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/BufferSet.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBuffer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBuffer.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBuffer.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBuffer.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,139 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat.quantile;
+
+import org.apache.mahout.math.list.DoubleArrayList;
+
+/** A buffer holding <tt>double</tt> elements; internally used for computing approximate quantiles. */
+class DoubleBuffer extends Buffer {
+
+  protected DoubleArrayList values;
+  protected boolean isSorted;
+
+  /**
+   * This method was created in VisualAge.
+   *
+   * @param k int
+   */
+  DoubleBuffer(int k) {
+    super(k);
+    this.values = new DoubleArrayList(0);
+    this.isSorted = false;
+  }
+
+  /** Adds a value to the receiver. */
+  public void add(double value) {
+    if (!isAllocated) {
+      allocate();
+    } // lazy buffer allocation can safe memory.
+    values.add(value);
+    this.isSorted = false;
+  }
+
+  /** Adds a value to the receiver. */
+  public void addAllOfFromTo(DoubleArrayList elements, int from, int to) {
+    if (!isAllocated) {
+      allocate();
+    } // lazy buffer allocation can safe memory.
+    values.addAllOfFromTo(elements, from, to);
+    this.isSorted = false;
+  }
+
+  /** Allocates the receiver. */
+  protected void allocate() {
+    isAllocated = true;
+    values.ensureCapacity(k);
+  }
+
+  /** Clears the receiver. */
+  @Override
+  public void clear() {
+    values.clear();
+  }
+
+  /**
+   * Returns a deep copy of the receiver.
+   *
+   * @return a deep copy of the receiver.
+   */
+  @Override
+  public Object clone() {
+    DoubleBuffer copy = (DoubleBuffer) super.clone();
+    if (this.values != null) {
+      copy.values = copy.values.copy();
+    }
+    return copy;
+  }
+
+  /** Returns whether the specified element is contained in the receiver. */
+  public boolean contains(double element) {
+    this.sort();
+    return values.contains(element);
+  }
+
+  /** Returns whether the receiver is empty. */
+  @Override
+  public boolean isEmpty() {
+    return values.isEmpty();
+  }
+
+  /** Returns whether the receiver is empty. */
+  @Override
+  public boolean isFull() {
+    return values.size() == k;
+  }
+
+  /**
+   * Returns the number of elements currently needed to store all contained elements. This number usually differs from
+   * the results of method <tt>size()</tt>, according to the underlying algorithm.
+   */
+  public int memory() {
+    return values.elements().length;
+  }
+
+  /**
+   * Returns the rank of a given element within the sorted sequence of the receiver. A rank is the number of elements <=
+   * element. Ranks are of the form {1,2,...size()}. If no element is <= element, then the rank is zero. If the element
+   * lies in between two contained elements, then uses linear interpolation.
+   *
+   * @param element the element to search for
+   * @return the rank of the element.
+   */
+  public double rank(double element) {
+    this.sort();
+    return org.apache.mahout.math.jet.stat.Descriptive.rankInterpolated(this.values, element);
+  }
+
+  /** Returns the number of elements contained in the receiver. */
+  @Override
+  public int size() {
+    return values.size();
+  }
+
+  /** Sorts the receiver. */
+  @Override
+  public void sort() {
+    if (!this.isSorted) {
+      // IMPORTANT: TO DO : replace mergeSort with quickSort!
+      // currently it is mergeSort only for debugging purposes (JDK 1.2 can't be imported into VisualAge).
+      values.sort();
+      //values.mergeSort();
+      this.isSorted = true;
+    }
+  }
+
+  /** Returns a String representation of the receiver. */
+  public String toString() {
+    return "k=" + this.k +
+        ", w=" + Long.toString(weight()) +
+        ", l=" + Integer.toString(level()) +
+        ", size=" + values.size();
+    //", v=" + values.toString();
+  }
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBuffer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBufferSet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBufferSet.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBufferSet.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBufferSet.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,419 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat.quantile;
+
+import org.apache.mahout.math.function.DoubleProcedure;
+
+/** A set of buffers holding <tt>double</tt> elements; internally used for computing approximate quantiles. */
+class DoubleBufferSet extends BufferSet {
+
+  protected DoubleBuffer[] buffers;
+  private boolean nextTriggerCalculationState; //tmp var only
+
+  /**
+   * Constructs a buffer set with b buffers, each having k elements
+   *
+   * @param b the number of buffers
+   * @param k the number of elements per buffer
+   */
+  DoubleBufferSet(int b, int k) {
+    this.buffers = new DoubleBuffer[b];
+    this.clear(k);
+  }
+
+  /**
+   * Returns an empty buffer if at least one exists. Preferably returns a buffer which has already been used, i.e. a
+   * buffer which has already been allocated.
+   */
+  public DoubleBuffer _getFirstEmptyBuffer() {
+    DoubleBuffer emptyBuffer = null;
+    for (int i = buffers.length; --i >= 0;) {
+      if (buffers[i].isEmpty()) {
+        if (buffers[i].isAllocated()) {
+          return buffers[i];
+        }
+        emptyBuffer = buffers[i];
+      }
+    }
+
+    return emptyBuffer;
+  }
+
+  /** Returns all full or partial buffers. */
+  public DoubleBuffer[] _getFullOrPartialBuffers() {
+    //count buffers
+    int count = 0;
+    for (int i = buffers.length; --i >= 0;) {
+      if (!buffers[i].isEmpty()) {
+        count++;
+      }
+    }
+
+    //collect buffers
+    DoubleBuffer[] collectedBuffers = new DoubleBuffer[count];
+    int j = 0;
+    for (int i = buffers.length; --i >= 0;) {
+      if (!buffers[i].isEmpty()) {
+        collectedBuffers[j++] = buffers[i];
+      }
+    }
+
+    return collectedBuffers;
+  }
+
+  /**
+   * Determines all full buffers having the specified level.
+   *
+   * @return all full buffers having the specified level
+   */
+  public DoubleBuffer[] _getFullOrPartialBuffersWithLevel(int level) {
+    //count buffers
+    int count = 0;
+    for (int i = buffers.length; --i >= 0;) {
+      if ((!buffers[i].isEmpty()) && buffers[i].level() == level) {
+        count++;
+      }
+    }
+
+    //collect buffers
+    DoubleBuffer[] collectedBuffers = new DoubleBuffer[count];
+    int j = 0;
+    for (int i = buffers.length; --i >= 0;) {
+      if ((!buffers[i].isEmpty()) && buffers[i].level() == level) {
+        collectedBuffers[j++] = buffers[i];
+      }
+    }
+
+    return collectedBuffers;
+  }
+
+  /** @return The minimum level of all buffers which are full. */
+  public int _getMinLevelOfFullOrPartialBuffers() {
+    int b = this.b();
+    int minLevel = Integer.MAX_VALUE;
+
+    for (int i = 0; i < b; i++) {
+      DoubleBuffer buffer = this.buffers[i];
+      if ((!buffer.isEmpty()) && (buffer.level() < minLevel)) {
+        minLevel = buffer.level();
+      }
+    }
+    return minLevel;
+  }
+
+  /** Returns the number of empty buffers. */
+  public int _getNumberOfEmptyBuffers() {
+    int count = 0;
+    for (int i = buffers.length; --i >= 0;) {
+      if (buffers[i].isEmpty()) {
+        count++;
+      }
+    }
+
+    return count;
+  }
+
+  /** Returns all empty buffers. */
+  public DoubleBuffer _getPartialBuffer() {
+    for (int i = buffers.length; --i >= 0;) {
+      if (buffers[i].isPartial()) {
+        return buffers[i];
+      }
+    }
+    return null;
+  }
+
+  /** @return the number of buffers */
+  public int b() {
+    return buffers.length;
+  }
+
+  /**
+   * Removes all elements from the receiver.  The receiver will be empty after this call returns, and its memory
+   * requirements will be close to zero.
+   */
+  public void clear() {
+    clear(k());
+  }
+
+  /**
+   * Removes all elements from the receiver.  The receiver will be empty after this call returns, and its memory
+   * requirements will be close to zero.
+   */
+  protected void clear(int k) {
+    for (int i = b(); --i >= 0;) {
+      this.buffers[i] = new DoubleBuffer(k);
+    }
+    this.nextTriggerCalculationState = true;
+  }
+
+  /**
+   * Returns a deep copy of the receiver.
+   *
+   * @return a deep copy of the receiver.
+   */
+  @Override
+  public Object clone() {
+    DoubleBufferSet copy = (DoubleBufferSet) super.clone();
+
+    copy.buffers = copy.buffers.clone();
+    for (int i = buffers.length; --i >= 0;) {
+      copy.buffers[i] = (DoubleBuffer) copy.buffers[i].clone();
+    }
+    return copy;
+  }
+
+  /**
+   * Collapses the specified full buffers (must not include partial buffer).
+   *
+   * @param buffers the buffers to be collapsed (all of them must be full or partially full)
+   * @return a full buffer containing the collapsed values. The buffer has accumulated weight.
+   */
+  public DoubleBuffer collapse(DoubleBuffer[] buffers) {
+    //determine W
+    int W = 0;                //sum of all weights
+    for (DoubleBuffer buffer : buffers) {
+      W += buffer.weight();
+    }
+
+    //determine outputTriggerPositions
+    int k = this.k();
+    long[] triggerPositions = new long[k];
+    for (int j = 0; j < k; j++) {
+      triggerPositions[j] = this.nextTriggerPosition(j, W);
+    }
+
+    //do the main work: determine values at given positions in sorted sequence
+    double[] outputValues = this.getValuesAtPositions(buffers, triggerPositions);
+
+    //mark all full buffers as empty, except the first, which will contain the output
+    for (int b = 1; b < buffers.length; b++) {
+      buffers[b].clear();
+    }
+
+    DoubleBuffer outputBuffer = buffers[0];
+    outputBuffer.values.elements(outputValues);
+    outputBuffer.weight(W);
+
+    return outputBuffer;
+  }
+
+  /** Returns whether the specified element is contained in the receiver. */
+  public boolean contains(double element) {
+    for (int i = buffers.length; --i >= 0;) {
+      if ((!buffers[i].isEmpty()) && buffers[i].contains(element)) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  /**
+   * Applies a procedure to each element of the receiver, if any. Iterates over the receiver in no particular order.
+   *
+   * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise
+   *                  continues.
+   */
+  public boolean forEach(DoubleProcedure procedure) {
+    for (int i = buffers.length; --i >= 0;) {
+      for (int w = buffers[i].weight(); --w >= 0;) {
+        if (!(buffers[i].values.forEach(procedure))) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Determines all values of the specified buffers positioned at the specified triggerPositions within the sorted
+   * sequence and fills them into outputValues.
+   *
+   * @param buffers          the buffers to be searched (all must be full or partial)
+   * @param triggerPositions the positions of elements within the sorted sequence to be retrieved
+   * @return outputValues a list filled with the values at triggerPositions
+   */
+  protected double[] getValuesAtPositions(DoubleBuffer[] buffers, long[] triggerPositions) {
+    //if (buffers.length==0)
+    //{
+    //  throw new IllegalArgumentException("Oops! buffer.length==0.");
+    //}
+
+    //log.info("triggers="+cern.it.util.Arrays.toString(positions));
+
+    //new DoubleArrayList(outputValues).fillFromToWith(0, outputValues.length-1, 0.0f);
+    //delte the above line, it is only for testing
+
+    //cern.it.util.Log.println("\nEntering getValuesAtPositions...");
+    //cern.it.util.Log.println("hitPositions="+cern.it.util.Arrays.toString(positions));
+
+    // sort buffers.
+    for (int i = buffers.length; --i >= 0;) {
+      buffers[i].sort();
+    }
+
+    // collect some infos into fast cache; for tuning purposes only.
+    int[] bufferSizes = new int[buffers.length];
+    double[][] bufferValues = new double[buffers.length][];
+    int totalBuffersSize = 0;
+    for (int i = buffers.length; --i >= 0;) {
+      bufferSizes[i] = buffers[i].size();
+      bufferValues[i] = buffers[i].values.elements();
+      totalBuffersSize += bufferSizes[i];
+      //cern.it.util.Log.println("buffer["+i+"]="+buffers[i].values);
+    }
+
+    // prepare merge of equi-distant elements within buffers into output values
+
+    // first collect some infos into fast cache; for tuning purposes only.
+    int buffersSize = buffers.length;
+    int triggerPositionsLength = triggerPositions.length;
+
+    // now prepare the important things.
+    int j = 0;                 //current position in collapsed values
+    int[] cursors = new int[buffers.length];  //current position in each buffer; init with zeroes
+    long nextHit = triggerPositions[j];    //next position in sorted sequence to trigger output population
+    double[] outputValues = new double[triggerPositionsLength];
+
+    if (totalBuffersSize == 0) {
+      // nothing to output, because no elements have been filled (we are empty).
+      // return meaningless values
+      for (int i = 0; i < triggerPositions.length; i++) {
+        outputValues[i] = Double.NaN;
+      }
+      return outputValues;
+    }
+
+    // fill all output values with equi-distant elements.
+    long counter = 0;             //current position in sorted sequence
+    while (j < triggerPositionsLength) {
+      //log.info("\nj="+j);
+      //log.info("counter="+counter);
+      //log.info("nextHit="+nextHit);
+
+      // determine buffer with smallest value at cursor position.
+      double minValue = Double.POSITIVE_INFINITY;
+      int minBufferIndex = -1;
+
+      for (int b = buffersSize; --b >= 0;) {
+        //DoubleBuffer buffer = buffers[b];
+        //if (cursors[b] < buffer.length) {
+        if (cursors[b] < bufferSizes[b]) {
+          ///double value = buffer.values[cursors[b]];
+          double value = bufferValues[b][cursors[b]];
+          if (value <= minValue) {
+            minValue = value;
+            minBufferIndex = b;
+          }
+        }
+      }
+
+      DoubleBuffer minBuffer = buffers[minBufferIndex];
+
+      // trigger copies into output sequence, if necessary.
+      counter += minBuffer.weight();
+      while (counter > nextHit && j < triggerPositionsLength) {
+        outputValues[j++] = minValue;
+        //log.info("adding to output="+minValue);
+        if (j < triggerPositionsLength) {
+          nextHit = triggerPositions[j];
+        }
+      }
+
+
+      // that element has now been treated, move further.
+      cursors[minBufferIndex]++;
+      //log.info("cursors="+cern.it.util.Arrays.toString(cursors));
+
+    } //end while (j<k)
+
+    //cern.it.util.Log.println("returning output="+cern.it.util.Arrays.toString(outputValues));
+    return outputValues;
+  }
+
+  /** @return the number of elements within a buffer. */
+  public int k() {
+    return buffers[0].k;
+  }
+
+  /** Returns the number of elements currently needed to store all contained elements. */
+  public long memory() {
+    long memory = 0;
+    for (int i = buffers.length; --i >= 0;) {
+      memory += buffers[i].memory();
+    }
+    return memory;
+  }
+
+  /**
+   * Computes the next triggerPosition for collapse
+   *
+   * @param j specifies that the j-th trigger position is to be computed
+   * @param W the accumulated weights
+   * @return the next triggerPosition for collapse
+   */
+  protected long nextTriggerPosition(int j, long W) {
+    long nextTriggerPosition;
+
+    if (W % 2L != 0) { //is W odd?
+      nextTriggerPosition = j * W + (W + 1) / 2;
+    } else { //W is even
+      //alternate between both possible next hit positions upon successive invocations
+      if (nextTriggerCalculationState) {
+        nextTriggerPosition = j * W + W / 2;
+      } else {
+        nextTriggerPosition = j * W + (W + 2) / 2;
+      }
+    }
+
+    return nextTriggerPosition;
+  }
+
+  /**
+   * Returns how many percent of the elements contained in the receiver are <tt>&lt;= element</tt>. Does linear
+   * interpolation if the element is not contained but lies in between two contained elements.
+   *
+   * @param element the element to search for.
+   * @return the percentage <tt>p</tt> of elements <tt>&lt;= element</tt> (<tt>0.0 &lt;= p &lt;=1.0)</tt>.
+   */
+  public double phi(double element) {
+    double elementsLessThanOrEqualToElement = 0.0;
+    for (int i = buffers.length; --i >= 0;) {
+      if (!buffers[i].isEmpty()) {
+        elementsLessThanOrEqualToElement += buffers[i].weight * buffers[i].rank(element);
+      }
+    }
+
+    return elementsLessThanOrEqualToElement / totalSize();
+  }
+
+  /** @return a String representation of the receiver */
+  public String toString() {
+    StringBuilder buf = new StringBuilder();
+    for (int b = 0; b < this.b(); b++) {
+      if (!buffers[b].isEmpty()) {
+        buf.append("buffer#").append(b).append(" = ");
+        buf.append(buffers[b].toString()).append('\n');
+      }
+    }
+    return buf.toString();
+  }
+
+  /** Returns the number of elements in all buffers. */
+  public long totalSize() {
+    DoubleBuffer[] fullBuffers = _getFullOrPartialBuffers();
+    long totalSize = 0;
+    for (int i = fullBuffers.length; --i >= 0;) {
+      totalSize += fullBuffers[i].size() * fullBuffers[i].weight();
+    }
+
+    return totalSize;
+  }
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleBufferSet.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileEstimator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileEstimator.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileEstimator.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileEstimator.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,268 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat.quantile;
+
+import org.apache.mahout.math.PersistentObject;
+import org.apache.mahout.math.function.DoubleProcedure;
+import org.apache.mahout.math.list.DoubleArrayList;
+import org.apache.mahout.math.list.ObjectArrayList;
+
+/**
+ * The abstract base class for approximate quantile finders computing quantiles over a sequence of <tt>double</tt>
+ * elements.
+ */
+//abstract class ApproximateDoubleQuantileFinder extends Object implements DoubleQuantileFinder {
+abstract class DoubleQuantileEstimator extends PersistentObject
+    implements DoubleQuantileFinder {
+
+  protected DoubleBufferSet bufferSet;
+  protected DoubleBuffer currentBufferToFill;
+  protected int totalElementsFilled;
+
+  /** Makes this class non instantiable, but still let's others inherit from it. */
+  protected DoubleQuantileEstimator() {
+  }
+
+  /**
+   * Adds a value to the receiver.
+   *
+   * @param value the value to add.
+   */
+  @Override
+  public void add(double value) {
+    totalElementsFilled++;
+    if (!sampleNextElement()) {
+      return;
+    }
+
+    if (currentBufferToFill == null) {
+      if (bufferSet._getFirstEmptyBuffer() == null) {
+        collapse();
+      }
+      newBuffer();
+    }
+
+    currentBufferToFill.add(value);
+    if (currentBufferToFill.isFull()) {
+      currentBufferToFill = null;
+    }
+  }
+
+  /**
+   * Adds all values of the specified list to the receiver.
+   *
+   * @param values the list of which all values shall be added.
+   */
+  @Override
+  public void addAllOf(DoubleArrayList values) {
+    addAllOfFromTo(values, 0, values.size() - 1);
+  }
+
+  /**
+   * Adds the part of the specified list between indexes <tt>from</tt> (inclusive) and <tt>to</tt> (inclusive) to the
+   * receiver.
+   *
+   * @param values the list of which elements shall be added.
+   * @param from   the index of the first element to be added (inclusive).
+   * @param to     the index of the last element to be added (inclusive).
+   */
+  @Override
+  public void addAllOfFromTo(DoubleArrayList values, int from, int to) {
+
+    double[] valuesToAdd = values.elements();
+    int k = this.bufferSet.k();
+    int bufferSize = k;
+    double[] bufferValues = null;
+    if (currentBufferToFill != null) {
+      bufferValues = currentBufferToFill.values.elements();
+      bufferSize = currentBufferToFill.size();
+    }
+
+    for (int i = from - 1; ++i <= to;) {
+      if (sampleNextElement()) {
+        if (bufferSize == k) { // full
+          if (bufferSet._getFirstEmptyBuffer() == null) {
+            collapse();
+          }
+          newBuffer();
+          if (!currentBufferToFill.isAllocated) {
+            currentBufferToFill.allocate();
+          }
+          currentBufferToFill.isSorted = false;
+          bufferValues = currentBufferToFill.values.elements();
+          bufferSize = 0;
+        }
+
+        bufferValues[bufferSize++] = valuesToAdd[i];
+        if (bufferSize == k) { // full
+          currentBufferToFill.values.setSize(bufferSize);
+          currentBufferToFill = null;
+        }
+      }
+    }
+    if (this.currentBufferToFill != null) {
+      this.currentBufferToFill.values.setSize(bufferSize);
+    }
+
+    this.totalElementsFilled += to - from + 1;
+  }
+
+  /** Not yet commented. */
+  protected DoubleBuffer[] buffersToCollapse() {
+    int minLevel = bufferSet._getMinLevelOfFullOrPartialBuffers();
+    return bufferSet._getFullOrPartialBuffersWithLevel(minLevel);
+  }
+
+  /**
+   * Removes all elements from the receiver.  The receiver will be empty after this call returns, and its memory
+   * requirements will be close to zero.
+   */
+  @Override
+  public void clear() {
+    this.totalElementsFilled = 0;
+    this.currentBufferToFill = null;
+    this.bufferSet.clear();
+  }
+
+  /**
+   * Returns a deep copy of the receiver.
+   *
+   * @return a deep copy of the receiver.
+   */
+  @Override
+  public Object clone() {
+    DoubleQuantileEstimator copy = (DoubleQuantileEstimator) super.clone();
+    if (this.bufferSet != null) {
+      copy.bufferSet = (DoubleBufferSet) copy.bufferSet.clone();
+      if (this.currentBufferToFill != null) {
+        int index = new ObjectArrayList(this.bufferSet.buffers).indexOf(this.currentBufferToFill, true);
+        copy.currentBufferToFill = copy.bufferSet.buffers[index];
+      }
+    }
+    return copy;
+  }
+
+  /** Not yet commented. */
+  protected void collapse() {
+    DoubleBuffer[] toCollapse = buffersToCollapse();
+    DoubleBuffer outputBuffer = bufferSet.collapse(toCollapse);
+
+    int minLevel = toCollapse[0].level();
+    outputBuffer.level(minLevel + 1);
+
+    postCollapse(toCollapse);
+  }
+
+  /** Returns whether the specified element is contained in the receiver. */
+  public boolean contains(double element) {
+    return bufferSet.contains(element);
+  }
+
+  /**
+   * Applies a procedure to each element of the receiver, if any. Iterates over the receiver in no particular order.
+   *
+   * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise
+   *                  continues.
+   * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise.
+   */
+  @Override
+  public boolean forEach(DoubleProcedure procedure) {
+    return this.bufferSet.forEach(procedure);
+  }
+
+  /**
+   * Returns the number of elements currently needed to store all contained elements. This number usually differs from
+   * the results of method <tt>size()</tt>, according to the underlying datastructure.
+   */
+  @Override
+  public long memory() {
+    return bufferSet.memory();
+  }
+
+  /** Not yet commented. */
+  protected abstract void newBuffer();
+
+  /**
+   * Returns how many percent of the elements contained in the receiver are <tt>&lt;= element</tt>. Does linear
+   * interpolation if the element is not contained but lies in between two contained elements.
+   *
+   * @param element the element to search for.
+   * @return the percentage <tt>p</tt> of elements <tt>&lt;= element</tt> (<tt>0.0 &lt;= p &lt;=1.0)</tt>.
+   */
+  @Override
+  public double phi(double element) {
+    return bufferSet.phi(element);
+  }
+
+  /** Not yet commented. */
+  protected abstract void postCollapse(DoubleBuffer[] toCollapse);
+
+  /** Default implementation does nothing. */
+  protected DoubleArrayList preProcessPhis(DoubleArrayList phis) {
+    return phis;
+  }
+
+  /**
+   * Computes the specified quantile elements over the values previously added.
+   *
+   * @param phis the quantiles for which elements are to be computed. Each phi must be in the interval [0.0,1.0].
+   *             <tt>phis</tt> must be sorted ascending.
+   * @return the approximate quantile elements.
+   */
+  @Override
+  public DoubleArrayList quantileElements(DoubleArrayList phis) {
+
+    phis = preProcessPhis(phis);
+
+    long[] triggerPositions = new long[phis.size()];
+    long totalSize = this.bufferSet.totalSize();
+    for (int i = phis.size(); --i >= 0;) {
+      triggerPositions[i] = Utils.epsilonCeiling(phis.get(i) * totalSize) - 1;
+    }
+
+
+    DoubleBuffer[] fullBuffers = bufferSet._getFullOrPartialBuffers();
+    //double[] quantileElements = new double[phis.size()];
+
+    //do the main work: determine values at given positions in sorted sequence
+    return new DoubleArrayList(bufferSet.getValuesAtPositions(fullBuffers, triggerPositions));
+  }
+
+  /** Not yet commented. */
+  protected abstract boolean sampleNextElement();
+
+  /** Initializes the receiver */
+  protected void setUp(int b, int k) {
+    if (!(b >= 2 && k >= 1)) {
+      throw new IllegalArgumentException("Assertion: b>=2 && k>=1");
+    }
+    this.bufferSet = new DoubleBufferSet(b, k);
+    this.clear();
+  }
+
+  /**
+   * Returns the number of elements currently contained in the receiver (identical to the number of values added so
+   * far).
+   */
+  @Override
+  public long size() {
+    return totalElementsFilled;
+  }
+
+  /** Returns a String representation of the receiver. */
+  public String toString() {
+    String s = this.getClass().getName();
+    s = s.substring(s.lastIndexOf('.') + 1);
+    int b = bufferSet.b();
+    int k = bufferSet.k();
+    return s + "(mem=" + memory() + ", b=" + b + ", k=" + k + ", size=" + size() + ", totalSize=" +
+        this.bufferSet.totalSize() + ')';
+  }
+
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileEstimator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileFinder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileFinder.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileFinder.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileFinder.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,98 @@
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat.quantile;
+
+import org.apache.mahout.math.function.DoubleProcedure;
+import org.apache.mahout.math.list.DoubleArrayList;
+
+/** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
+@Deprecated
+public interface DoubleQuantileFinder extends java.io.Serializable {
+  //public interface DoubleQuantileFinder extends com.objy.db.iapp.PersistentEvents, java.io.Serializable {
+
+  /**
+   * Adds a value to the receiver.
+   *
+   * @param value the value to add.
+   */
+  void add(double value);
+
+  /**
+   * Adds all values of the specified list to the receiver.
+   *
+   * @param values the list of which all values shall be added.
+   */
+  void addAllOf(DoubleArrayList values);
+
+  /**
+   * Adds the part of the specified list between indexes <tt>from</tt> (inclusive) and <tt>to</tt> (inclusive) to the
+   * receiver.
+   *
+   * @param values the list of which elements shall be added.
+   * @param from   the index of the first element to be added (inclusive).
+   * @param to     the index of the last element to be added (inclusive).
+   */
+  void addAllOfFromTo(DoubleArrayList values, int from, int to);
+
+  /**
+   * Removes all elements from the receiver.  The receiver will be empty after this call returns, and its memory
+   * requirements will be close to zero.
+   */
+  void clear();
+
+  /**
+   * Returns a deep copy of the receiver.
+   *
+   * @return a deep copy of the receiver.
+   */
+  Object clone();
+
+  /**
+   * Applies a procedure to each element of the receiver, if any. Iterates over the receiver in no particular order.
+   *
+   * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise
+   *                  continues.
+   * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise.
+   */
+  boolean forEach(DoubleProcedure procedure);
+
+  /**
+   * Returns the number of elements currently needed to store all contained elements. This number usually differs from
+   * the results of method <tt>size()</tt>, according to the underlying datastructure.
+   */
+  long memory();
+
+  /**
+   * Returns how many percent of the elements contained in the receiver are <tt>&lt;= element</tt>. Does linear
+   * interpolation if the element is not contained but lies in between two contained elements.
+   *
+   * Writing a wrapper is a good idea if you can think of better ways of doing interpolation. Same if you want to keep
+   * min,max and other such measures.
+   *
+   * @param element the element to search for.
+   * @return the percentage <tt>p</tt> of elements <tt>&lt;= element</tt> (<tt>0.0 &lt;= p &lt;=1.0)</tt>.
+   */
+  double phi(double element);
+
+  /**
+   * Computes the specified quantile elements over the values previously added.
+   *
+   * @param phis the quantiles for which elements are to be computed. Each phi must be in the interval [0.0,1.0].
+   *             <tt>phis</tt> must be sorted ascending.
+   * @return the quantile elements.
+   */
+  DoubleArrayList quantileElements(DoubleArrayList phis);
+
+  /**
+   * Returns the number of elements currently contained in the receiver (identical to the number of values added so
+   * far).
+   */
+  long size();
+
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/DoubleQuantileFinder.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/EquiDepthHistogram.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/EquiDepthHistogram.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/EquiDepthHistogram.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/EquiDepthHistogram.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,137 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat.quantile;
+
+import org.apache.mahout.math.PersistentObject;
+
+/**
+ * Read-only equi-depth histogram for selectivity estimation.
+ * Assume you have collected statistics over a data set, among them a one-dimensional equi-depth histogram (quantiles).
+ * Then an applications or DBMS might want to estimate the <i>selectivity</i> of some range query <tt>[from,to]</tt>, i.e. the percentage of data set elements contained in the query range.
+ * This class does not collect equi-depth histograms but only space efficiently stores already produced histograms and provides operations for selectivity estimation.
+ * Uses linear interpolation.
+ * <p>
+ * This class stores a list <tt>l</tt> of <tt>float</tt> values for which holds:
+ * <li>Let <tt>v</tt> be a list of values (sorted ascending) an equi-depth histogram has been computed over.</li>
+ * <li>Let <tt>s=l.length</tt>.</li>
+ * <li>Let <tt>p=(0, 1/s-1), 2/s-1,..., s-1/s-1=1.0)</tt> be a list of the <tt>s</tt> percentages.</li>
+ * <li>Then for each <tt>i=0..s-1: l[i] = e : v.contains(e) && v[0],..., v[p[i]*v.length] &lt;= e</tt>.</li>
+ * <li>(In particular: <tt>l[0]=min(v)=v[0]</tt> and <tt>l[s-1]=max(v)=v[s-1]</tt>.)</li>
+ *
+ */
+
+/** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
+@Deprecated
+public class EquiDepthHistogram extends PersistentObject {
+
+  private final float[] binBoundaries;
+
+  /**
+   * Constructs an equi-depth histogram with the given quantile elements. Quantile elements must be sorted ascending and
+   * have the form specified in the class documentation.
+   */
+  public EquiDepthHistogram(float[] quantileElements) {
+    this.binBoundaries = quantileElements;
+  }
+
+  /**
+   * Returns the bin index of the given element. In other words, returns a handle to the range the element falls into.
+   *
+   * @param element the element to search for.
+   * @throws java.lang.IllegalArgumentException
+   *          if the element is not contained in any bin.
+   */
+  public int binOfElement(float element) {
+    int index = java.util.Arrays.binarySearch(binBoundaries, element);
+    if (index >= 0) {
+      // element found.
+      if (index == binBoundaries.length - 1) {
+        index--;
+      } // last bin is a closed interval.
+    } else {
+      // element not found.
+      index -= -1; // index = -index-1; now index is the insertion point.
+      if (index == 0 || index == binBoundaries.length) {
+        throw new IllegalArgumentException("Element=" + element + " not contained in any bin.");
+      }
+      index--;
+    }
+    return index;
+  }
+
+  /** Returns the number of bins. In other words, returns the number of subdomains partitioning the entire value domain. */
+  public int bins() {
+    return binBoundaries.length - 1;
+  }
+
+  /**
+   * Returns the end of the range associated with the given bin.
+   *
+   * @throws ArrayIndexOutOfBoundsException if <tt>binIndex &lt; 0 || binIndex &gt;= bins()</tt>.
+   */
+  public float endOfBin(int binIndex) {
+    return binBoundaries[binIndex + 1];
+  }
+
+  /**
+   * Returns the percentage of elements in the range (from,to]. Does linear interpolation.
+   *
+   * @param from the start point (exclusive).
+   * @param to   the end point (inclusive).
+   */
+  public double percentFromTo(float from, float to) {
+    return phi(to) - phi(from);
+  }
+
+  /**
+   * Returns how many percent of the elements contained in the receiver are <tt>&lt;= element</tt>. Does linear
+   * interpolation.
+   *
+   * @param element the element to search for.
+   */
+  public double phi(float element) {
+    int size = binBoundaries.length;
+    if (element <= binBoundaries[0]) {
+      return 0.0;
+    }
+    if (element >= binBoundaries[size - 1]) {
+      return 1.0;
+    }
+
+    double binWidth = 1.0 / (size - 1);
+    int index = java.util.Arrays.binarySearch(binBoundaries, element);
+    //int index = new FloatArrayList(binBoundaries).binarySearch(element);
+    if (index >= 0) { // found
+      return binWidth * index;
+    }
+
+    // do linear interpolation
+    int insertionPoint = -index - 1;
+    double from = binBoundaries[insertionPoint - 1];
+    double to = binBoundaries[insertionPoint] - from;
+    double p = (element - from) / to;
+    return binWidth * (p + (insertionPoint - 1));
+  }
+
+  /** @deprecated Deprecated. Returns the number of bin boundaries. */
+  @Deprecated
+  public int size() {
+    return binBoundaries.length;
+  }
+
+  /**
+   * Returns the start of the range associated with the given bin.
+   *
+   * @throws ArrayIndexOutOfBoundsException if <tt>binIndex &lt; 0 || binIndex &gt;= bins()</tt>.
+   */
+  public float startOfBin(int binIndex) {
+    return binBoundaries[binIndex];
+  }
+
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/EquiDepthHistogram.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/ExactDoubleQuantileFinder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/ExactDoubleQuantileFinder.java?rev=891983&view=auto
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/ExactDoubleQuantileFinder.java (added)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/ExactDoubleQuantileFinder.java Thu Dec 17 23:22:16 2009
@@ -0,0 +1,180 @@
+/*
+Copyright � 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear in all copies and 
+that both that copyright notice and this permission notice appear in supporting documentation. 
+CERN makes no representations about the suitability of this software for any purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math.jet.stat.quantile;
+
+import org.apache.mahout.math.PersistentObject;
+import org.apache.mahout.math.function.DoubleProcedure;
+import org.apache.mahout.math.list.DoubleArrayList;
+
+/**
+ * Exact quantile finding algorithm for known and unknown <tt>N</tt> requiring large main memory; computes quantiles
+ * over a sequence of <tt>double</tt> elements. The folkore algorithm: Keeps all elements in main memory, sorts the
+ * list, then picks the quantiles.
+ */
+//class ExactDoubleQuantileFinder extends Object implements DoubleQuantileFinder {
+class ExactDoubleQuantileFinder extends PersistentObject implements DoubleQuantileFinder {
+
+  private DoubleArrayList buffer;
+  private boolean isSorted;
+
+  /** Constructs an empty exact quantile finder. */
+  ExactDoubleQuantileFinder() {
+    this.buffer = new DoubleArrayList(0);
+    this.clear();
+  }
+
+  /**
+   * Adds a value to the receiver.
+   *
+   * @param value the value to add.
+   */
+  @Override
+  public void add(double value) {
+    this.buffer.add(value);
+    this.isSorted = false;
+  }
+
+  /**
+   * Adds all values of the specified list to the receiver.
+   *
+   * @param values the list of which all values shall be added.
+   */
+  @Override
+  public void addAllOf(DoubleArrayList values) {
+    addAllOfFromTo(values, 0, values.size() - 1);
+  }
+
+  /**
+   * Adds the part of the specified list between indexes <tt>from</tt> (inclusive) and <tt>to</tt> (inclusive) to the
+   * receiver.
+   *
+   * @param values the list of which elements shall be added.
+   * @param from   the index of the first element to be added (inclusive).
+   * @param to     the index of the last element to be added (inclusive).
+   */
+  @Override
+  public void addAllOfFromTo(DoubleArrayList values, int from, int to) {
+    buffer.addAllOfFromTo(values, from, to);
+    this.isSorted = false;
+  }
+
+  /**
+   * Removes all elements from the receiver.  The receiver will be empty after this call returns, and its memory
+   * requirements will be close to zero.
+   */
+  @Override
+  public void clear() {
+    this.buffer.clear();
+    this.buffer.trimToSize();
+    this.isSorted = false;
+  }
+
+  /**
+   * Returns a deep copy of the receiver.
+   *
+   * @return a deep copy of the receiver.
+   */
+  @Override
+  public Object clone() {
+    ExactDoubleQuantileFinder copy = (ExactDoubleQuantileFinder) super.clone();
+    if (this.buffer != null) {
+      copy.buffer = copy.buffer.copy();
+    }
+    return copy;
+  }
+
+  /** Returns whether the specified element is contained in the receiver. */
+  public boolean contains(double element) {
+    this.sort();
+    return buffer.binarySearch(element) >= 0;
+  }
+
+  /**
+   * Applies a procedure to each element of the receiver, if any. Iterates over the receiver in no particular order.
+   *
+   * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise
+   *                  continues.
+   * @return <tt>false</tt> if the procedure stopped before all elements where iterated over, <tt>true</tt> otherwise.
+   */
+  @Override
+  public boolean forEach(DoubleProcedure procedure) {
+    double[] theElements = buffer.elements();
+    int theSize = (int) size();
+
+    for (int i = 0; i < theSize;) {
+      if (!procedure.apply(theElements[i++])) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Returns the number of elements currently needed to store all contained elements. This number usually differs from
+   * the results of method <tt>size()</tt>, according to the underlying datastructure.
+   */
+  @Override
+  public long memory() {
+    return buffer.elements().length;
+  }
+
+  /**
+   * Returns how many percent of the elements contained in the receiver are <tt>&lt;= element</tt>. Does linear
+   * interpolation if the element is not contained but lies in between two contained elements.
+   *
+   * @param element the element to search for.
+   * @return the percentage <tt>p</tt> of elements <tt>&lt;= element</tt> (<tt>0.0 &lt;= p &lt;=1.0)</tt>.
+   */
+  @Override
+  public double phi(double element) {
+    this.sort();
+    return org.apache.mahout.math.jet.stat.Descriptive.rankInterpolated(buffer, element) / this.size();
+  }
+
+  /**
+   * Computes the specified quantile elements over the values previously added.
+   *
+   * @param phis the quantiles for which elements are to be computed. Each phi must be in the interval [0.0,1.0].
+   *             <tt>phis</tt> must be sorted ascending.
+   * @return the exact quantile elements.
+   */
+  @Override
+  public DoubleArrayList quantileElements(DoubleArrayList phis) {
+    this.sort();
+    return org.apache.mahout.math.jet.stat.Descriptive.quantiles(this.buffer, phis);
+  }
+
+  /**
+   * Returns the number of elements currently contained in the receiver (identical to the number of values added so
+   * far).
+   */
+  @Override
+  public long size() {
+    return buffer.size();
+  }
+
+  /** Sorts the receiver. */
+  protected void sort() {
+    if (!isSorted) {
+      // IMPORTANT: TO DO : replace mergeSort with quickSort!
+      // currently it is mergeSort because JDK 1.2 can't be imported into VisualAge.
+      buffer.sort();
+      //this.buffer.mergeSort();
+      this.isSorted = true;
+    }
+  }
+
+  /** Returns a String representation of the receiver. */
+  public String toString() {
+    String s = this.getClass().getName();
+    s = s.substring(s.lastIndexOf('.') + 1);
+    return s + "(mem=" + memory() + ", size=" + size() + ')';
+  }
+
+}

Propchange: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/jet/stat/quantile/ExactDoubleQuantileFinder.java
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message