commons-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Jeff Drew" <jeffrd...@gmail.com>
Subject Commons Math vs. Excel stats?
Date Tue, 07 Nov 2006 22:51:15 GMT
I'm having a weird problem when using the commons math package.  When I run
statistics using the Commons math, then compare the results to Excel, I get
different standard deviation and median, but min, max, and count are the
same.  I'd appreciate any ideas on how Commons Math and Excel differ in
these calculations.

MEDIAN:  Excel:  468,231   CommonsMath:  485,711
STD:        Excel:    11,861   CommonsMath:    10,678

The data set is 18,000 integers so I won't include those.  They are mostly 6
digit numbers.  Here's the code:

import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
import org.apache.commons.math.stat.descriptive.rank.Max;
import org.apache.commons.math.stat.descriptive.rank.Median;
import org.apache.commons.math.stat.descriptive.rank.Min;
import gnu.trove.TDoubleHashSet;

public class ExampleForMailingList {

    StandardDeviation std                    = new StandardDeviation( );

    Min               min                    = new Min( );

    Max               max                    = new Max( );

    Median            medianInstance               = new Median();

    private double    minimum                = 0;

    private double    maximum                = 0;

    private double    standardDev            = 0;

    private double median = 0;

    private boolean   isCalcDone             = false;

    private double   count                  = 0;

    /**
     * <code>data</code> If the length is zero, then only 0 measurements
were added.
     */
    TDoubleHashSet    data                   = new TDoubleHashSet( );

    /**
     * If the <code>measurement</code> is greater than 0, then add it to the
data.
     *
     * @param measurement
     */
    public void addMeasurement( int measurement ) {

            data.add( measurement );

            count++;
    }

    /**
     * Must be called before using the getters.  This method calculates the
statistics.
     */
    public void calculate() {

        try {
            double[] dataArray = data.toArray( );

            minimum = min.evaluate( dataArray );

            maximum = max.evaluate( dataArray );

            standardDev = std.evaluate( dataArray );

            median = medianInstance.evaluate(dataArray);

            isCalcDone = true;

        } catch ( RuntimeException e ) {
            // TODO Auto-generated catch block
            e.printStackTrace( );
        }
    } // calculate

    public double getMinimum() throws CalcNotDoneException {
        return minimum;
    } // get minimum

    public double getMaximum() throws CalcNotDoneException {
           return maximum;
    } // get maximum

    public double getStd() throws CalcNotDoneException {
         return standardDev;
    } // get std

    public double getMedian() throws CalcNotDoneException {
 return median;
    } // get median

    /**
     * Converts a result set into a set of statistics which a table model
consumes. Calculates: <br>
     * 1. min <br>
     * 2. average <br>
     * 3. max <br>
     * 4. median<br>
     * 5. percent threshold violations <br>

     * @param resultSetArg
     *            Results of an order table query
    */
    public void processResults( ResultSet results,String column ) {

        int value = Integer.MAX_VALUE;

           try {
            while ( results.next( ) ) {

                      value = ( int ) results.getLong( column );

                        if ( value > -1 ) {
                            addMeasurement( value );
                        }
                }
        } catch ( SQLException e ) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } // while
 } // processResults

    public static void main( String[] args ) {
        ExampleForMailingList example = new ExampleForMailingList();
        example.processResults(ResultSet set,"columnA");
        example.calculate( );

        System.out.println("std: "+ example.getStd( ));
        System.out.println("std: "+ example.getMedian( ));
    }
}

Thanks!

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message