Return-Path:
Represents an
* empirical probability distribution -- a probability distribution derived
* from observed data without making any assumptions about the functional form
- * of the population distribution that the data come from.
- * An An EmpiricalDistribution
maintains data structures, called
+ * of the population distribution that the data come from.EmpiricalDistribution
maintains data structures, called
* distribution digests, that describe empirical distributions and
* support the following operations:
*
The implementation uses what amounts to the * * Variable Kernel Method with Gaussian smoothing:
@@ -71,7 +77,18 @@ import org.apache.commons.math3.util.Mat *
+ * bin and std dev = std dev of associated bin.
+ * + *EmpiricalDistribution implements the {@link RealDistribution} interface + * as follows. Given x within the range of values in the dataset, let B + * be the bin containing x and let K be the within-bin kernel for B. Let P(B-) + * be the sum of the probabilities of the bins below B and let K(B) be the + * mass of B under K (i.e., the integral of the kernel density over B). Then + * set P(X < x) = P(B-) + K(x) / K(B) where K(x) is the kernel distribution + * evaluated at x. This results in a cdf that matches the grouped frequency + * distribution at the bin endpoints and interpolates within bins using + * within-bin kernels.
+ * *USAGE NOTES:binCount
is set by default to 1000. A good rule of thumb
* is to set the bin count to approximately the length of the input file divided
@@ -82,7 +99,7 @@ import org.apache.commons.math3.util.Mat
*
* @version $Id$
*/
-public class EmpiricalDistribution implements Serializable {
+public class EmpiricalDistribution extends AbstractRealDistribution {
/** Default bin count */
public static final int DEFAULT_BIN_COUNT = 1000;
@@ -192,16 +209,16 @@ public class EmpiricalDistribution imple
*
* @param in the input data array
* @exception NullArgumentException if in is null
- * @throws MathIllegalStateException if an IOException occurs
*/
- public void load(double[] in) throws NullArgumentException, MathIllegalStateException {
+ public void load(double[] in) throws NullArgumentException {
DataAdapter da = new ArrayDataAdapter(in);
try {
da.computeStats();
// new adapter for the second pass
fillBinStats(new ArrayDataAdapter(in));
- } catch (IOException e) {
- throw new MathIllegalStateException(e, LocalizedFormats.SIMPLE_MESSAGE, e.getLocalizedMessage());
+ } catch (IOException ex) {
+ // Can't happen
+ throw new MathInternalError();
}
loaded = true;
@@ -213,7 +230,7 @@ public class EmpiricalDistribution imple
* The input file must be an ASCII text file containing one * valid numeric entry per line.
* - * @param url url of the input file + * @param url url of the input file * * @throws IOException if an IO error occurs * @throws NullArgumentException if url is null @@ -429,9 +446,9 @@ public class EmpiricalDistribution imple */ private int findBin(double value) { return FastMath.min( - FastMath.max((int) FastMath.ceil((value- min) / delta) - 1, 0), + FastMath.max((int) FastMath.ceil((value - min) / delta) - 1, 0), binCount - 1); - } + } /** * Generates a random value from this distribution. @@ -513,9 +530,8 @@ public class EmpiricalDistribution imple */ public double[] getUpperBounds() { double[] binUpperBounds = new double[binCount]; - binUpperBounds[0] = min + delta; - for (int i = 1; i < binCount - 1; i++) { - binUpperBounds[i] = binUpperBounds[i-1] + delta; + for (int i = 0; i < binCount - 1; i++) { + binUpperBounds[i] = min + delta * (i + 1); } binUpperBounds[binCount - 1] = max; return binUpperBounds; @@ -557,4 +573,263 @@ public class EmpiricalDistribution imple public void reSeed(long seed) { randomData.reSeed(seed); } + + // Distribution methods --------------------------- + + /** + * {@inheritDoc} + * @since 3.1 + */ + public double probability(double x) { + return 0; + } + + /** + * {@inheritDoc} + * + *Returns the kernel density normalized so that its integral over each bin + * equals the bin mass.
+ * + *Algorithm description:
Algorithm description:
Algorithm description: