commons-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Maciej Kwidziński (JIRA) <j...@apache.org>
Subject [jira] [Commented] (MATH-1462) EmpiricalDistribution:inverseCumulativeProbability return Infinity
Date Mon, 23 Jul 2018 16:08:00 GMT

    [ https://issues.apache.org/jira/browse/MATH-1462?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16553036#comment-16553036
] 

Maciej Kwidziński commented on MATH-1462:
-----------------------------------------

Correct, MATH-1431 fix was not good enough.
I can't directly link to an actual executable test, because our Git repo is private. I'll
inline key classes in the comment. It's in Kotlin, but I hope it's readable enough.

{code}
/**
 * Represents the [quantile function](https://en.wikipedia.org/wiki/Quantile_function).
 */
class QuantileFunction {

    fun plot(
        data: Collection<Number>
    ): List<Quantile> {
        if (data.isEmpty()) {
            return emptyList()
        }
        val distribution = RoughEmpiricalDistribution(
            binCount = 1000,
            data = data.map { it.toDouble() }.toDoubleArray()
        )
        return (0..100)
            .map { percentileIndex -> percentileIndex.toDouble() / 100 }
            .map { cumulativeProbability ->
                Quantile(
                    cumulativeProbability = cumulativeProbability,
                    value = distribution.inverseCumulativeProbability(cumulativeProbability)
                )
            }
    }
}
{code}

{code}
import org.hamcrest.Matchers.*
import org.junit.Assert.assertThat
import org.junit.Test

class QuantileFunctionTest {

    @Test
    fun shouldPlotPercentile32() {
        val data = listOf(
            18054,
            17548,
            17350,
            17860,
            17827,
            17653,
            18113,
            18405,
            17746,
            17647,
            18160,
            17955,
            17705,
            17890,
            17974,
            17857,
            13287,
            18645,
            17775,
            17730,
            17996,
            18263,
            17861,
            17161,
            17717,
            18134,
            18669,
            18340,
            17221,
            18292,
            18146,
            17520,
            18207,
            17829,
            18206,
            13301,
            18257,
            17626,
            18358,
            18340,
            18320,
            17852,
            17804,
            17577,
            17718,
            18099,
            13395,
            17763,
            17911,
            17978,
            12935,
            17519,
            17550,
            18728,
            18518,
            17698,
            18739,
            18553,
            17982,
            18113,
            17974,
            17961,
            17645,
            17867,
            17890,
            17498,
            18718,
            18191,
            18177,
            17923,
            18164,
            18155,
            6212,
            5961,
            711
        )

        val quantiles = QuantileFunction().plot(data)

        val p31 = quantiles[31].value
        val p32 = quantiles[32].value
        val p33 = quantiles[33].value
        assertThat(p32, greaterThanOrEqualTo(p31))
        assertThat(p32, lessThanOrEqualTo(p33))
    }
}
{code}

{code}
/**
 * Represents a [quantile](https://en.wikipedia.org/wiki/Quantile).
 */
data class Quantile(
    val cumulativeProbability: Double,
    val value: Double
)
{code}

{code}
import org.apache.commons.math3.distribution.ConstantRealDistribution
import org.apache.commons.math3.distribution.RealDistribution
import org.apache.commons.math3.random.EmpiricalDistribution
import org.apache.commons.math3.stat.descriptive.SummaryStatistics

/**
 * Works around [MATH-1462](https://issues.apache.org/jira/browse/MATH-1462).
 */
class RoughEmpiricalDistribution(
    binCount: Int,
    data: DoubleArray
) : EmpiricalDistribution(binCount) {

    init {
        super.load(data)
    }

    override fun getKernel(
        bStats: SummaryStatistics
    ): RealDistribution {
        return ConstantRealDistribution(bStats.mean)
    }
}
{code}

> EmpiricalDistribution:inverseCumulativeProbability return Infinity
> ------------------------------------------------------------------
>
>                 Key: MATH-1462
>                 URL: https://issues.apache.org/jira/browse/MATH-1462
>             Project: Commons Math
>          Issue Type: Bug
>    Affects Versions: 3.6.1
>            Reporter: elyes belarbi
>            Priority: Critical
>
> Hi,
> inverseCumulativeProbability(0.5) return "infinity" which is absurd while it return correct
values for 0.499999 and 0.511111, Here is the test :
> {code:java}
> double[] data = {6464.0205, 6449.1328, 6489.4569, 6497.5533, 6251.6487, 6252.6513, 6339.7883,
> 6356.2622, 6222.1251, 6157.3813, 6242.4741, 6332.5347, 6468.0633, 6471.2319, 6473.9929,
6589.1322, 
> 6511.2191, 6339.4349, 6307.7735, 6288.0915, 6354.0572, 6385.8283, 6325.3756, 6433.1699,
6433.6507, 
> 6424.6806, 6380.5268, 6407.6705, 6241.2198, 6230.3681, 6367.5943, 6358.4817, 6272.8039,
6269.0211, 
> 6312.9027, 6349.5926, 6404.0775, 6326.986, 6283.8685, 6309.9021, 6336.8554, 6389.1598,
6281.0372, 
> 6304.8852, 6359.2651, 6426.519, 6400.3926, 6440.6798, 6292.5812, 6398.4911, 6307.0002,
6284.2111, 6271.371, 6368.6377, 6323.3372, 6276.2155, 
> 6335.0117, 6319.2466, 6252.9969, 6445.2074, 6461.3944, 6384.1345};
> EmpiricalDistribution ed = new EmpiricalDistribution(data.length);
> ed.load(data);
> double p50 = ed.inverseCumulativeProbability(0.5);
> double p51 = ed.inverseCumulativeProbability(0.51111);
> double p49 = ed.inverseCumulativeProbability(0.49999);
> assertTrue(p51<6350);
> assertTrue(p49<6341);
> assertTrue(p50<7000);
> {code}
>  Any clue to fix this ?
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Mime
View raw message