commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sebb <seb...@gmail.com>
Subject Re: [math] Added Cheng sampling procedure for beta distribution.
Date Wed, 17 Dec 2014 10:32:47 GMT
It would be helpful to add such info to the Commons Git docs

On 17 December 2014 at 07:02, Thomas Neidhart <thomas.neidhart@gmail.com> wrote:
> On 12/17/2014 05:10 AM, Phil Steitz wrote:
>> I am lost here.  Sorry I am still git-newbie.  I see no subsequent
>> commit message showing this change has been backed out, but it looks
>> to me like HEAD has it reverted.  Is that right?  If so, why no
>> subsequent notification?  And how can I get and re-apply the changes
>> below so I can do some testing?
>
> the commit was done to a new branch: MATH-1153
>
> when you type the following command you will see all remote branches:
>
>   git branch -r
>
> to switch your local environment to this branch do the following
>
>   git fetch     # update all information
>   git checkout -b MATH-1153 origin/MATH-1153
>
> your working copy should have been switched to the new branch. To verify
> this you can type in
>
>   git branch
>
> and should see something like this:
>
>   master
> * MATH-1153
>
> the asterisk means this branch is currently checked out.
>
>
> To switch back to the main branch type in
>
>   git checkout master
>
> Thomas
>
>>
>> Also, re comments else-thread, does the Chi-Square test in
>> RealDistributionAbstractTest succeed regularly and which of the
>> tests below show sensitivity to PRNG seed?  Both?  One thing to look
>> at is chi-square tests using the TestUtils impl with a lot of bins
>> and healthy sample size.  Generally, mistakes in sampling
>> implementations will show systematic problems in certain bins and
>> the TestUtils impl will report where they are.
>>
>> Phil
>>
>> On 12/16/14 1:49 PM, luc@apache.org wrote:
>>> Repository: commons-math
>>> Updated Branches:
>>>   refs/heads/MATH-1153 [created] 9ba0a1cad
>>>
>>>
>>> Added Cheng sampling procedure for beta distribution.
>>>
>>> Thanks to Sergei Lebedev for the patch.
>>>
>>> JIRA: MATH-1153
>>>
>>> Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo
>>> Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/9ba0a1ca
>>> Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/9ba0a1ca
>>> Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/9ba0a1ca
>>>
>>> Branch: refs/heads/MATH-1153
>>> Commit: 9ba0a1cadf37b0d1c68b01c706a10a042e19e1f6
>>> Parents: d9b951c
>>> Author: Luc Maisonobe <luc@apache.org>
>>> Authored: Tue Dec 16 21:48:44 2014 +0100
>>> Committer: Luc Maisonobe <luc@apache.org>
>>> Committed: Tue Dec 16 21:48:44 2014 +0100
>>>
>>> ----------------------------------------------------------------------
>>>  pom.xml                                         |   3 +
>>>  .../math3/distribution/BetaDistribution.java    | 139 +++++++++++++++----
>>>  .../distribution/BetaDistributionTest.java      |  74 ++++++++++
>>>  3 files changed, 192 insertions(+), 24 deletions(-)
>>> ----------------------------------------------------------------------
>>>
>>>
>>> http://git-wip-us.apache.org/repos/asf/commons-math/blob/9ba0a1ca/pom.xml
>>> ----------------------------------------------------------------------
>>> diff --git a/pom.xml b/pom.xml
>>> index 3ee5db2..667d36e 100644
>>> --- a/pom.xml
>>> +++ b/pom.xml
>>> @@ -252,6 +252,9 @@
>>>        <name>Piotr Kochanski</name>
>>>      </contributor>
>>>      <contributor>
>>> +      <name>Sergei Lebedev</name>
>>> +    </contributor>
>>> +    <contributor>
>>>        <name>Bob MacCallum</name>
>>>      </contributor>
>>>      <contributor>
>>>
>>> http://git-wip-us.apache.org/repos/asf/commons-math/blob/9ba0a1ca/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java
>>> ----------------------------------------------------------------------
>>> diff --git a/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java
b/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java
>>> index 3f62f64..458fe23 100644
>>> --- a/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java
>>> +++ b/src/main/java/org/apache/commons/math3/distribution/BetaDistribution.java
>>> @@ -23,6 +23,7 @@ import org.apache.commons.math3.random.Well19937c;
>>>  import org.apache.commons.math3.special.Beta;
>>>  import org.apache.commons.math3.special.Gamma;
>>>  import org.apache.commons.math3.util.FastMath;
>>> +import org.apache.commons.math3.util.Precision;
>>>
>>>  /**
>>>   * Implements the Beta distribution.
>>> @@ -34,10 +35,12 @@ public class BetaDistribution extends AbstractRealDistribution
{
>>>      /**
>>>       * Default inverse cumulative probability accuracy.
>>>       * @since 2.1
>>> +     * @deprecated as of 3.4, this parameter is not used anymore
>>>       */
>>> +    @Deprecated
>>>      public static final double DEFAULT_INVERSE_ABSOLUTE_ACCURACY = 1e-9;
>>>      /** Serializable version identifier. */
>>> -    private static final long serialVersionUID = -1221965979403477668L;
>>> +    private static final long serialVersionUID = 20141216L;
>>>      /** First shape parameter. */
>>>      private final double alpha;
>>>      /** Second shape parameter. */
>>> @@ -46,8 +49,6 @@ public class BetaDistribution extends AbstractRealDistribution
{
>>>       * updated whenever alpha or beta are changed.
>>>       */
>>>      private double z;
>>> -    /** Inverse cumulative probability accuracy. */
>>> -    private final double solverAbsoluteAccuracy;
>>>
>>>      /**
>>>       * Build a new instance.
>>> @@ -63,7 +64,7 @@ public class BetaDistribution extends AbstractRealDistribution
{
>>>       * @param beta Second shape parameter (must be positive).
>>>       */
>>>      public BetaDistribution(double alpha, double beta) {
>>> -        this(alpha, beta, DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
>>> +        this(new Well19937c(), alpha, beta);
>>>      }
>>>
>>>      /**
>>> @@ -82,9 +83,11 @@ public class BetaDistribution extends AbstractRealDistribution
{
>>>       * cumulative probability estimates (defaults to
>>>       * {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}).
>>>       * @since 2.1
>>> +     * @deprecated as of 3.4, the inverse cumulative accuracy is not used anymore
>>>       */
>>> +    @Deprecated
>>>      public BetaDistribution(double alpha, double beta, double inverseCumAccuracy)
{
>>> -        this(new Well19937c(), alpha, beta, inverseCumAccuracy);
>>> +        this(alpha, beta);
>>>      }
>>>
>>>      /**
>>> @@ -96,7 +99,11 @@ public class BetaDistribution extends AbstractRealDistribution
{
>>>       * @since 3.3
>>>       */
>>>      public BetaDistribution(RandomGenerator rng, double alpha, double beta)
{
>>> -        this(rng, alpha, beta, DEFAULT_INVERSE_ABSOLUTE_ACCURACY);
>>> +        super(rng);
>>> +
>>> +        this.alpha = alpha;
>>> +        this.beta = beta;
>>> +        z = Double.NaN;
>>>      }
>>>
>>>      /**
>>> @@ -109,17 +116,14 @@ public class BetaDistribution extends AbstractRealDistribution
{
>>>       * cumulative probability estimates (defaults to
>>>       * {@link #DEFAULT_INVERSE_ABSOLUTE_ACCURACY}).
>>>       * @since 3.1
>>> +     * @deprecated as of 3.4, the inverse cumulative accuracy is not used anymore
>>>       */
>>> +    @Deprecated
>>>      public BetaDistribution(RandomGenerator rng,
>>>                              double alpha,
>>>                              double beta,
>>>                              double inverseCumAccuracy) {
>>> -        super(rng);
>>> -
>>> -        this.alpha = alpha;
>>> -        this.beta = beta;
>>> -        z = Double.NaN;
>>> -        solverAbsoluteAccuracy = inverseCumAccuracy;
>>> +        this(rng, alpha, beta);
>>>      }
>>>
>>>      /**
>>> @@ -188,18 +192,6 @@ public class BetaDistribution extends AbstractRealDistribution
{
>>>      }
>>>
>>>      /**
>>> -     * Return the absolute accuracy setting of the solver used to estimate
>>> -     * inverse cumulative probabilities.
>>> -     *
>>> -     * @return the solver absolute accuracy.
>>> -     * @since 2.1
>>> -     */
>>> -    @Override
>>> -    protected double getSolverAbsoluteAccuracy() {
>>> -        return solverAbsoluteAccuracy;
>>> -    }
>>> -
>>> -    /**
>>>       * {@inheritDoc}
>>>       *
>>>       * For first shape parameter {@code alpha} and second shape parameter
>>> @@ -266,4 +258,103 @@ public class BetaDistribution extends AbstractRealDistribution
{
>>>      public boolean isSupportConnected() {
>>>          return true;
>>>      }
>>> +
>>> +    /** {@inheritDoc}
>>> +     * <p>
>>> +     * Sampling is performed using Cheng algorithms:
>>> +     * </p>
>>> +     * <p>
>>> +     * R. C. H. Cheng, "Generating beta variates with nonintegral shape parameters.".
>>> +     *                 Communications of the ACM, 21, 317–322, 1978.
>>> +     * </p>
>>> +     */
>>> +    @Override
>>> +    public double sample() {
>>> +        if (FastMath.min(alpha, beta) > 1) {
>>> +            return algorithmBB();
>>> +        } else {
>>> +            return algorithmBC();
>>> +        }
>>> +    }
>>> +
>>> +    /** Returns one sample using Cheng's BB algorithm, when both &alpha;
and &beta; are greater than 1.
>>> +     * @return sampled value
>>> +     */
>>> +    private double algorithmBB() {
>>> +        final double a = FastMath.min(alpha, beta);
>>> +        final double b = FastMath.max(alpha, beta);
>>> +        final double newAlpha = a + b;
>>> +        final double newBeta = FastMath.sqrt((newAlpha - 2.) / (2. * a * b -
newAlpha));
>>> +        final double gamma = a + 1. / newBeta;
>>> +
>>> +        double r;
>>> +        double w;
>>> +        double t;
>>> +        do {
>>> +            final double u1 = random.nextDouble();
>>> +            final double u2 = random.nextDouble();
>>> +            final double v = newBeta * FastMath.log(u1 / (1. - u1));
>>> +            w = a * FastMath.exp(v);
>>> +            final double newZ = u1 * u1 * u2;
>>> +            r = gamma * v - 1.3862944;
>>> +            final double s = a + r - w;
>>> +            if (s + 2.609438 >= 5 * newZ) {
>>> +                break;
>>> +            }
>>> +
>>> +            t = FastMath.log(newZ);
>>> +            if (s >= t) {
>>> +                break;
>>> +            }
>>> +        } while (r + newAlpha * FastMath.log(newAlpha / (b + w)) < t);
>>> +
>>> +        w = FastMath.min(w, Double.MAX_VALUE);
>>> +        return Precision.equals(a, alpha) ? w / (b + w) : b / (b + w);
>>> +    }
>>> +
>>> +    /** Returns one sample using Cheng's BC algorithm, when at least one of
&alpha; and &beta; is smaller than 1.
>>> +     * @return sampled value
>>> +     */
>>> +    private double algorithmBC() {
>>> +        final double a = FastMath.max(alpha, beta);
>>> +        final double b = FastMath.min(alpha, beta);
>>> +        final double newAlpha = a + b;
>>> +        final double newBeta = 1. / b;
>>> +        final double delta = 1. + a - b;
>>> +        final double k1 = delta * (0.0138889 + 0.0416667 * b) / (a * newBeta
- 0.777778);
>>> +        final double k2 = 0.25 + (0.5 + 0.25 / delta) * b;
>>> +
>>> +        double w;
>>> +        for (;;) {
>>> +            final double u1 = random.nextDouble();
>>> +            final double u2 = random.nextDouble();
>>> +            final double y = u1 * u2;
>>> +            final double newZ = u1 * y;
>>> +            if (u1 < 0.5) {
>>> +                if (0.25 * u2 + newZ - y >= k1) {
>>> +                    continue;
>>> +                }
>>> +            } else {
>>> +                if (newZ <= 0.25) {
>>> +                    final double v = newBeta * FastMath.log(u1 / (1. - u1));
>>> +                    w = a * FastMath.exp(v);
>>> +                    break;
>>> +                }
>>> +
>>> +                if (newZ >= k2) {
>>> +                    continue;
>>> +                }
>>> +            }
>>> +
>>> +            final double v = newBeta * FastMath.log(u1 / (1. - u1));
>>> +            w = a * FastMath.exp(v);
>>> +            if (newAlpha * (FastMath.log(newAlpha / (b + w)) + v) - 1.3862944
>= FastMath.log(newZ)) {
>>> +                break;
>>> +            }
>>> +        }
>>> +
>>> +        w = FastMath.min(w, Double.MAX_VALUE);
>>> +        return Precision.equals(a, alpha) ? w / (b + w) : b / (b + w);
>>> +    }
>>> +
>>>  }
>>>
>>> http://git-wip-us.apache.org/repos/asf/commons-math/blob/9ba0a1ca/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java
>>> ----------------------------------------------------------------------
>>> diff --git a/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java
b/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java
>>> index 217ae66..d26c6f0 100644
>>> --- a/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java
>>> +++ b/src/test/java/org/apache/commons/math3/distribution/BetaDistributionTest.java
>>> @@ -16,10 +16,22 @@
>>>   */
>>>  package org.apache.commons.math3.distribution;
>>>
>>> +import java.util.Arrays;
>>> +
>>> +import org.apache.commons.math3.random.RandomGenerator;
>>> +import org.apache.commons.math3.random.Well1024a;
>>> +import org.apache.commons.math3.random.Well19937a;
>>> +import org.apache.commons.math3.stat.StatUtils;
>>> +import org.apache.commons.math3.stat.inference.KolmogorovSmirnovTest;
>>> +import org.apache.commons.math3.stat.inference.TestUtils;
>>>  import org.junit.Assert;
>>>  import org.junit.Test;
>>>
>>>  public class BetaDistributionTest {
>>> +
>>> +    static final double[] alphaBetas = {0.1, 1, 10, 100, 1000};
>>> +    static final double epsilon = StatUtils.min(alphaBetas);
>>> +
>>>      @Test
>>>      public void testCumulative() {
>>>          double[] x = new double[]{-0.1, 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
0.8, 0.9, 1.0, 1.1};
>>> @@ -303,4 +315,66 @@ public class BetaDistributionTest {
>>>          Assert.assertEquals(dist.getNumericalMean(), 2.0 / 7.0, tol);
>>>          Assert.assertEquals(dist.getNumericalVariance(), 10.0 / (49.0 * 8.0),
tol);
>>>      }
>>> +
>>> +    @Test
>>> +    public void testMomentsSampling() {
>>> +        RandomGenerator random = new Well1024a(0x7829862c82fec2dal);
>>> +        final int numSamples = 1000;
>>> +        for (final double alpha : alphaBetas) {
>>> +            for (final double beta : alphaBetas) {
>>> +                final BetaDistribution betaDistribution = new BetaDistribution(random,
alpha, beta);
>>> +                final double[] observed = new BetaDistribution(alpha, beta).sample(numSamples);
>>> +                Arrays.sort(observed);
>>> +
>>> +                final String distribution = String.format("Beta(%.2f, %.2f)",
alpha, beta);
>>> +                Assert.assertEquals(String.format("E[%s]", distribution),
>>> +                                    betaDistribution.getNumericalMean(),
>>> +                                    StatUtils.mean(observed), epsilon);
>>> +                Assert.assertEquals(String.format("Var[%s]", distribution),
>>> +                                    betaDistribution.getNumericalVariance(),
>>> +                                    StatUtils.variance(observed), epsilon);
>>> +            }
>>> +        }
>>> +    }
>>> +
>>> +    @Test
>>> +    public void testGoodnessOfFit() {
>>> +        RandomGenerator random = new Well19937a(0x237db1db907b089fl);
>>> +        final int numSamples = 1000;
>>> +        final double level = 0.01;
>>> +        for (final double alpha : alphaBetas) {
>>> +            for (final double beta : alphaBetas) {
>>> +                final BetaDistribution betaDistribution = new BetaDistribution(random,
alpha, beta);
>>> +                final double[] observed = betaDistribution.sample(numSamples);
>>> +                Assert.assertFalse("G goodness-of-fit test rejected null at
alpha = " + level,
>>> +                                   gTest(betaDistribution, observed) < level);
>>> +                Assert.assertFalse("KS goodness-of-fit test rejected null at
alpha = " + level,
>>> +                                   new KolmogorovSmirnovTest(random).kolmogorovSmirnovTest(betaDistribution,
observed) < level);
>>> +            }
>>> +        }
>>> +    }
>>> +
>>> +    private double gTest(final RealDistribution expectedDistribution, final
double[] values) {
>>> +        final int numBins = values.length / 30;
>>> +        final double[] breaks = new double[numBins];
>>> +        for (int b = 0; b < breaks.length; b++) {
>>> +            breaks[b] = expectedDistribution.inverseCumulativeProbability((double)
b / numBins);
>>> +        }
>>> +
>>> +        final long[] observed = new long[numBins];
>>> +        for (final double value : values) {
>>> +            int b = 0;
>>> +            do {
>>> +                b++;
>>> +            } while (b < numBins && value >= breaks[b]);
>>> +
>>> +            observed[b - 1]++;
>>> +        }
>>> +
>>> +        final double[] expected = new double[numBins];
>>> +        Arrays.fill(expected, (double) values.length / numBins);
>>> +
>>> +        return TestUtils.gTest(expected, observed);
>>> +    }
>>> +
>>>  }
>>>
>>>
>>
>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
>> For additional commands, e-mail: dev-help@commons.apache.org
>>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
> For additional commands, e-mail: dev-help@commons.apache.org
>

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org


Mime
View raw message