commons-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Rory Winston <rwins...@eircom.net>
Subject Re: Commons Math vs. Excel stats?
Date Fri, 10 Nov 2006 00:30:29 GMT
This example was really bugging me, as I had a hard time believing that 
[math] and Excel could be so far out of whack. I did a simple experiment 
and posted the results on my blog:

http://www.researchkitchen.co.uk/blog/archives/75

Jeff, this may show that the issue is likely somewhere else in your 
code. If you're still having issues, you may actually want to post the 
actual data used, if possible, so somebody else can verify it with your 
data.

Cheers
Rory



Jeff Drew wrote:
> I'm having a weird problem when using the commons math package.  When 
> I run
> statistics using the Commons math, then compare the results to Excel, 
> I get
> different standard deviation and median, but min, max, and count are the
> same.  I'd appreciate any ideas on how Commons Math and Excel differ in
> these calculations.
>
> MEDIAN:  Excel:  468,231   CommonsMath:  485,711
> STD:        Excel:    11,861   CommonsMath:    10,678
>
> The data set is 18,000 integers so I won't include those.  They are 
> mostly 6
> digit numbers.  Here's the code:
>
> import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
> import org.apache.commons.math.stat.descriptive.rank.Max;
> import org.apache.commons.math.stat.descriptive.rank.Median;
> import org.apache.commons.math.stat.descriptive.rank.Min;
> import gnu.trove.TDoubleHashSet;
>
> public class ExampleForMailingList {
>
>    StandardDeviation std                    = new StandardDeviation( );
>
>    Min               min                    = new Min( );
>
>    Max               max                    = new Max( );
>
>    Median            medianInstance               = new Median();
>
>    private double    minimum                = 0;
>
>    private double    maximum                = 0;
>
>    private double    standardDev            = 0;
>
>    private double median = 0;
>
>    private boolean   isCalcDone             = false;
>
>    private double   count                  = 0;
>
>    /**
>     * <code>data</code> If the length is zero, then only 0 measurements
> were added.
>     */
>    TDoubleHashSet    data                   = new TDoubleHashSet( );
>
>    /**
>     * If the <code>measurement</code> is greater than 0, then add it 
> to the
> data.
>     *
>     * @param measurement
>     */
>    public void addMeasurement( int measurement ) {
>
>            data.add( measurement );
>
>            count++;
>    }
>
>    /**
>     * Must be called before using the getters.  This method calculates 
> the
> statistics.
>     */
>    public void calculate() {
>
>        try {
>            double[] dataArray = data.toArray( );
>
>            minimum = min.evaluate( dataArray );
>
>            maximum = max.evaluate( dataArray );
>
>            standardDev = std.evaluate( dataArray );
>
>            median = medianInstance.evaluate(dataArray);
>
>            isCalcDone = true;
>
>        } catch ( RuntimeException e ) {
>            // TODO Auto-generated catch block
>            e.printStackTrace( );
>        }
>    } // calculate
>
>    public double getMinimum() throws CalcNotDoneException {
>        return minimum;
>    } // get minimum
>
>    public double getMaximum() throws CalcNotDoneException {
>           return maximum;
>    } // get maximum
>
>    public double getStd() throws CalcNotDoneException {
>         return standardDev;
>    } // get std
>
>    public double getMedian() throws CalcNotDoneException {
> return median;
>    } // get median
>
>    /**
>     * Converts a result set into a set of statistics which a table model
> consumes. Calculates: <br>
>     * 1. min <br>
>     * 2. average <br>
>     * 3. max <br>
>     * 4. median<br>
>     * 5. percent threshold violations <br>
>
>     * @param resultSetArg
>     *            Results of an order table query
>    */
>    public void processResults( ResultSet results,String column ) {
>
>        int value = Integer.MAX_VALUE;
>
>           try {
>            while ( results.next( ) ) {
>
>                      value = ( int ) results.getLong( column );
>
>                        if ( value > -1 ) {
>                            addMeasurement( value );
>                        }
>                }
>        } catch ( SQLException e ) {
>            // TODO Auto-generated catch block
>            e.printStackTrace();
>        } // while
> } // processResults
>
>    public static void main( String[] args ) {
>        ExampleForMailingList example = new ExampleForMailingList();
>        example.processResults(ResultSet set,"columnA");
>        example.calculate( );
>
>        System.out.println("std: "+ example.getStd( ));
>        System.out.println("std: "+ example.getMedian( ));
>    }
> }
>
> Thanks!
>



---------------------------------------------------------------------
To unsubscribe, e-mail: commons-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-user-help@jakarta.apache.org


Mime
View raw message