I remember having similar things a while back (between custom Java and
Excel rather than Commons Math). I seem to recall Excel having some
overflow oddities/bugs that led to the incorrect results.
It was a couple of years ago at a previous company, so my memory isn't
too hot :)
Hen
On 11/7/06, Jeff Drew <jeffrdrew@gmail.com> wrote:
> I'm having a weird problem when using the commons math package. When I run
> statistics using the Commons math, then compare the results to Excel, I get
> different standard deviation and median, but min, max, and count are the
> same. I'd appreciate any ideas on how Commons Math and Excel differ in
> these calculations.
>
> MEDIAN: Excel: 468,231 CommonsMath: 485,711
> STD: Excel: 11,861 CommonsMath: 10,678
>
> The data set is 18,000 integers so I won't include those. They are mostly 6
> digit numbers. Here's the code:
>
> import org.apache.commons.math.stat.descriptive.moment.StandardDeviation;
> import org.apache.commons.math.stat.descriptive.rank.Max;
> import org.apache.commons.math.stat.descriptive.rank.Median;
> import org.apache.commons.math.stat.descriptive.rank.Min;
> import gnu.trove.TDoubleHashSet;
>
> public class ExampleForMailingList {
>
> StandardDeviation std = new StandardDeviation( );
>
> Min min = new Min( );
>
> Max max = new Max( );
>
> Median medianInstance = new Median();
>
> private double minimum = 0;
>
> private double maximum = 0;
>
> private double standardDev = 0;
>
> private double median = 0;
>
> private boolean isCalcDone = false;
>
> private double count = 0;
>
> /**
> * <code>data</code> If the length is zero, then only 0 measurements
> were added.
> */
> TDoubleHashSet data = new TDoubleHashSet( );
>
> /**
> * If the <code>measurement</code> is greater than 0, then add it to
the
> data.
> *
> * @param measurement
> */
> public void addMeasurement( int measurement ) {
>
> data.add( measurement );
>
> count++;
> }
>
> /**
> * Must be called before using the getters. This method calculates the
> statistics.
> */
> public void calculate() {
>
> try {
> double[] dataArray = data.toArray( );
>
> minimum = min.evaluate( dataArray );
>
> maximum = max.evaluate( dataArray );
>
> standardDev = std.evaluate( dataArray );
>
> median = medianInstance.evaluate(dataArray);
>
> isCalcDone = true;
>
> } catch ( RuntimeException e ) {
> // TODO Auto-generated catch block
> e.printStackTrace( );
> }
> } // calculate
>
> public double getMinimum() throws CalcNotDoneException {
> return minimum;
> } // get minimum
>
> public double getMaximum() throws CalcNotDoneException {
> return maximum;
> } // get maximum
>
> public double getStd() throws CalcNotDoneException {
> return standardDev;
> } // get std
>
> public double getMedian() throws CalcNotDoneException {
> return median;
> } // get median
>
> /**
> * Converts a result set into a set of statistics which a table model
> consumes. Calculates: <br>
> * 1. min <br>
> * 2. average <br>
> * 3. max <br>
> * 4. median<br>
> * 5. percent threshold violations <br>
>
> * @param resultSetArg
> * Results of an order table query
> */
> public void processResults( ResultSet results,String column ) {
>
> int value = Integer.MAX_VALUE;
>
> try {
> while ( results.next( ) ) {
>
> value = ( int ) results.getLong( column );
>
> if ( value > -1 ) {
> addMeasurement( value );
> }
> }
> } catch ( SQLException e ) {
> // TODO Auto-generated catch block
> e.printStackTrace();
> } // while
> } // processResults
>
> public static void main( String[] args ) {
> ExampleForMailingList example = new ExampleForMailingList();
> example.processResults(ResultSet set,"columnA");
> example.calculate( );
>
> System.out.println("std: "+ example.getStd( ));
> System.out.println("std: "+ example.getMedian( ));
> }
> }
>
> Thanks!
>
>
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-user-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-user-help@jakarta.apache.org
|