commons-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Remko Popma" <re...@daiwasmbc.co.jp>
Subject Re: Commons Math vs. Excel stats?
Date Wed, 08 Nov 2006 03:30:13 GMT
Median is defined as the middle value of a sorted list, or if the list 
has an even number of elements, the average of the two middle values of 
the sorted list.

The code below uses this definition to calculate the median. If this 
code gives the same answer as commons math for your large dataset, then 
perhaps Excel is wrong.

double median(double[] values) {
     if (values.length == 0) {
         return Double.NaN; // act same as commons.Math
     }
     double[] copy = new double[values.length];
     System.arraycopy(values, 0, copy, 0, values.length);
     java.util.Arrays.sort(copy);

     int midpos = copy.length / 2;
     if (copy.length % 2 == 1) { // odd number of elements
         return copy[midpos]; // return middle element
     }
     // even number of elements: return avg of two middle elements
     return (copy[midpos - 1] + copy[midpos]) / 2;
}

I tested with some small datasets, and this method gave the same answer 
as commons.Math...

Hope this helps,
--Remko Popma


Jeff Drew wrote:
> I'm having a weird problem when using the commons math package.  When I run
> statistics using the Commons math, then compare the results to Excel, I get
> different standard deviation and median, but min, max, and count are the
> same.  I'd appreciate any ideas on how Commons Math and Excel differ in
> these calculations.
> 
> MEDIAN:  Excel:  468,231   CommonsMath:  485,711
> STD:        Excel:    11,861   CommonsMath:    10,678
> 
> The data set is 18,000 integers so I won't include those.  They are 
> mostly 6
> digit numbers.  Here's the code:
> 
> import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
> import org.apache.commons.math.stat.descriptive.rank.Max;
> import org.apache.commons.math.stat.descriptive.rank.Median;
> import org.apache.commons.math.stat.descriptive.rank.Min;
> import gnu.trove.TDoubleHashSet;
> 
> public class ExampleForMailingList {
> 
>    StandardDeviation std                    = new StandardDeviation( );
> 
>    Min               min                    = new Min( );
> 
>    Max               max                    = new Max( );
> 
>    Median            medianInstance               = new Median();
> 
>    private double    minimum                = 0;
> 
>    private double    maximum                = 0;
> 
>    private double    standardDev            = 0;
> 
>    private double median = 0;
> 
>    private boolean   isCalcDone             = false;
> 
>    private double   count                  = 0;
> 
>    /**
>     * <code>data</code> If the length is zero, then only 0 measurements
> were added.
>     */
>    TDoubleHashSet    data                   = new TDoubleHashSet( );
> 
>    /**
>     * If the <code>measurement</code> is greater than 0, then add it to the
> data.
>     *
>     * @param measurement
>     */
>    public void addMeasurement( int measurement ) {
> 
>            data.add( measurement );
> 
>            count++;
>    }
> 
>    /**
>     * Must be called before using the getters.  This method calculates the
> statistics.
>     */
>    public void calculate() {
> 
>        try {
>            double[] dataArray = data.toArray( );
> 
>            minimum = min.evaluate( dataArray );
> 
>            maximum = max.evaluate( dataArray );
> 
>            standardDev = std.evaluate( dataArray );
> 
>            median = medianInstance.evaluate(dataArray);
> 
>            isCalcDone = true;
> 
>        } catch ( RuntimeException e ) {
>            // TODO Auto-generated catch block
>            e.printStackTrace( );
>        }
>    } // calculate
> 
>    public double getMinimum() throws CalcNotDoneException {
>        return minimum;
>    } // get minimum
> 
>    public double getMaximum() throws CalcNotDoneException {
>           return maximum;
>    } // get maximum
> 
>    public double getStd() throws CalcNotDoneException {
>         return standardDev;
>    } // get std
> 
>    public double getMedian() throws CalcNotDoneException {
> return median;
>    } // get median
> 
>    /**
>     * Converts a result set into a set of statistics which a table model
> consumes. Calculates: <br>
>     * 1. min <br>
>     * 2. average <br>
>     * 3. max <br>
>     * 4. median<br>
>     * 5. percent threshold violations <br>
> 
>     * @param resultSetArg
>     *            Results of an order table query
>    */
>    public void processResults( ResultSet results,String column ) {
> 
>        int value = Integer.MAX_VALUE;
> 
>           try {
>            while ( results.next( ) ) {
> 
>                      value = ( int ) results.getLong( column );
> 
>                        if ( value > -1 ) {
>                            addMeasurement( value );
>                        }
>                }
>        } catch ( SQLException e ) {
>            // TODO Auto-generated catch block
>            e.printStackTrace();
>        } // while
> } // processResults
> 
>    public static void main( String[] args ) {
>        ExampleForMailingList example = new ExampleForMailingList();
>        example.processResults(ResultSet set,"columnA");
>        example.calculate( );
> 
>        System.out.println("std: "+ example.getStd( ));
>        System.out.println("std: "+ example.getMedian( ));
>    }
> }
> 
> Thanks!
> 

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-user-help@jakarta.apache.org


Mime
View raw message