climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prami...@apache.org
Subject svn commit: r1517753 [7/33] - in /incubator/climate/branches/rcmet-2.1.1: ./ src/ src/main/ src/main/python/ src/main/python/bin/ src/main/python/docs/ src/main/python/docs/_static/ src/main/python/docs/_templates/ src/main/python/rcmes/ src/main/pytho...
Date Tue, 27 Aug 2013 05:35:49 GMT
Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/temp.metrics
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/temp.metrics?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/temp.metrics (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/temp.metrics Tue Aug 27 05:35:42 2013
@@ -0,0 +1,1607 @@
+#
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+'''
+Module storing functions to calculate statistical metrics from numpy arrays
+'''
+
+import datetime
+import subprocess
+import sys
+import os
+import numpy as np
+import numpy.ma as ma
+from toolkit import plots, process
+from utils import misc
+from storage import files 
+from pylab import *
+import matplotlib
+import matplotlib.dates
+import matplotlib.pyplot as plt
+from matplotlib.font_manager import FontProperties
+from utils.Taylor import TaylorDiagram
+# 6/10/2012 JK: any Ngl dependence will be removed in later versions
+import Ngl
+
+def calc_ann_mean(t2, time):
+    '''
+    Calculate annual cycle in terms of monthly means at every grid point.
+    '''
+    # Calculate annual cycle in terms of monthly means at every grid point including single point case (ndim=1)
+    # note: this routine is identical to 'calc_annual_cycle_means': must be converted to calculate the annual mean
+    # Extract months from time variable
+    months = np.empty(len(time))
+    for t in np.arange(len(time)):
+        months[t] = time[t].month
+    if t2.ndim == 3:
+        means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means
+        # Calculate means month by month
+        for i in np.arange(12)+1:
+            means[i - 1, :, :] = t2[months == i, :, :].mean(0)
+    if t2.ndim == 1:
+        means = np.empty((12)) # empty array to store means
+        # Calculate means month by month
+        for i in np.arange(12)+1:
+            means[i - 1] = t2[months == i].mean(0)
+    return means
+
+
+def calc_clim_month(t2, time):
+    '''
+    Calculate monthly means at every grid point.
+    '''
+    # Calculate monthly means at every grid point including single point case (ndim=1)
+    # Extract months from time variable
+    months = np.empty(len(time))
+    for t in np.arange(len(time)):
+        months[t] = time[t].month
+    if t2.ndim == 3:
+        means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means
+        # Calculate means month by month
+        for i in np.arange(12) + 1:
+            means[i - 1, :, :] = t2[months == i, :, :].mean(0)
+    if t2.ndim == 1:
+        means = np.empty((12)) # empty array to store means
+        # Calculate means month by month
+        for i in np.arange(12) + 1:
+            means[i - 1] = t2[months == i].mean(0)
+    return means
+
+
+def calc_clim_year(nYR, nT, ngrdY, ngrdX, t2, time):
+    '''
+    Calculate annual mean timeseries and climatology for both 2-D and point time series.
+    '''
+    # Extract months from time variable
+    yy = np.empty(nT)
+    mm = np.empty(nT)
+    for t in np.arange(nT):
+        yy[t] = time[t].year
+        mm[t] = time[t].month
+    if t2.ndim == 3:
+        tSeries = ma.zeros((nYR, ngrdY, ngrdX))
+        i = 0
+        for myunit in np.unique(yy):
+            wh = (yy == myunit)
+            data = t2[wh, :, :]
+            tSeries[i, :, :] = ma.average(data, axis = 0)
+            #print 'data.shape= ',data.shape,'  i= ',i,'  yy= ',yy
+            i += 1
+        means = ma.zeros((ngrdY, ngrdX))
+        means = ma.average(tSeries, axis = 0)
+    elif t2.ndim == 1:
+        tSeries = ma.zeros((nYR))
+        i = 0
+        for myunit in np.unique(yy):
+            wh = (yy == myunit)
+            data = t2[wh]
+            tSeries[i] = ma.average(data, axis = 0)
+            #print 'data.shape= ',data.shape,'  i= ',i,'  yy= ',yy
+            i += 1
+        means = ma.zeros((ngrdY, ngrdX))
+        means = ma.average(tSeries, axis = 0)
+    return tSeries, means
+
+
+def calc_clim_season(nYR, nT, mB, mE, ngrdY, ngrdX, t2, time):
+    '''
+    Calculate seasonal mean timeseries and climatology for both 2-D and point time series.
+    '''
+    #-------------------------------------------------------------------------------------
+    # Calculate seasonal mean timeseries and climatology for both 2-d and point time series
+    # The season to be calculated is defined by moB and moE; moE>=moB always
+    #-------------------------------------------------------------------------------------
+    # Extract months from time variable
+    yy = np.empty(nT)
+    mm = np.empty(nT)
+    for t in np.arange(nT):
+        yy[t] = time[t].year
+        mm[t] = time[t].month
+    if t2.ndim == 3:
+        tSeries = ma.zeros((nYR, ngrdY, ngrdX))
+        i = 0
+        for myunit in np.unique(yy):
+            wh = (yy == myunit) & (mm >= mB) & (mm <= mE)
+            data = t2[wh, :, :]
+            tSeries[i, :, :] = ma.average(data, axis = 0)
+            #print 'data.shape= ',data.shape,'  i= ',i,'  yy= ',yy
+            i += 1
+        means = ma.zeros((ngrdY, ngrdX))
+        means = ma.average(tSeries, axis = 0)
+    elif t2.ndim == 1:
+        tSeries = ma.zeros((nYR))
+        i = 0
+        for myunit in np.unique(yy):
+            wh = (yy == myunit) & (mm >= mB) & (mm <= mE)
+            data = t2[wh]
+            tSeries[i] = ma.average(data, axis = 0)
+            #print 'data.shape= ',data.shape,'  i= ',i,'  yy= ',yy
+            i += 1
+        means = ma.zeros((ngrdY, ngrdX))
+        means = ma.average(tSeries, axis = 0)
+    return tSeries, means
+
+
+def calc_clim_mo(nYR, nT, ngrdY, ngrdX, t2, time):
+    '''
+    Calculate monthly means at every grid points and the annual time series of single model including single point case (ndim=1).
+    JK20: This is modified from 'calc_clim_month'  with additional arguments & output, the annual time series of single model output (mData)
+    6/8/2013: bug fix: mm = months[t] --> mm = months[t] - 1, otherwise array overflow occurs
+    '''
+    # Extract months and monthly time series from the time and raw variable, respectively
+    months = np.empty(nT)
+    for t in np.arange(nT):
+        months[t] = time[t].month
+        if t == 0:
+            yy0 = time[t].year
+        # for a 2-D time series data
+    if t2.ndim == 3:
+        mData = ma.empty((nYR, 12, ngrdY, ngrdX))
+        for t in np.arange(nT):
+            yy = time[t].year
+            mm = months[t] - 1
+            yr = yy - yy0
+            mData[yr, mm, :, :] = t2[t, :, :]
+        # Calculate means month by month. means is an empty array to store means
+        means = ma.empty((12, ngrdY, ngrdX))
+        for i in np.arange(12) + 1:
+            means[i - 1, :, :] = t2[months == i, :, :].mean(0)
+        # for a point time series data
+    if t2.ndim == 1:
+        mData = ma.empty((nYR, 12))
+        for t in np.arange(nT):
+            yy = time[t].year
+            mm = months[t]
+            yr = yy - yy0
+            mData[yr, mm] = t2[t]
+        means = np.empty((12))
+        # Calculate means month by month. means is an empty array to store means
+        for i in np.arange(12) + 1:
+            means[i - 1] = t2[months == i].mean(0)
+    return mData, means
+
+
+def calc_clim_One_month(moID, nYR, nT, t2, time):
+    '''
+    Calculate the montly mean at every grid point for a specified month.
+    '''
+    #-------------------------------------------------------------------------------------
+    # Calculate monthly means at every grid point for a specified month
+    #-------------------------------------------------------------------------------------
+    # Extract months and the corresponding time series from time variable
+    months = np.empty(nT)
+    for t in np.arange(nT):
+        months[t] = time[t].month
+    if t2.ndim == 3:
+        mData = ma.empty((nYR, t2.shape[1], t2.shape[2])) # empty array to store time series
+        n = 0
+        if months[t] == moID:
+            mData[n, :, :] = t2[t, :, :]
+            n += 1
+        means = ma.empty((t2.shape[1], t2.shape[2])) # empty array to store means
+        # Calculate means for the month specified by moID
+        means[:, :] = t2[months == moID, :, :].mean(0)
+    return mData, means
+
+
+def calc_annual_cycle_means(data, time):
+    '''
+     Calculate monthly means for every grid point
+     
+     Inputs:: 
+     	data - masked 3d array of the model data (time, lon, lat)
+     	time - an array of python datetime objects
+    '''
+    # Extract months from time variable
+    months = np.empty(len(time))
+    
+    for t in np.arange(len(time)):
+        months[t] = time[t].month
+    
+    #if there is data varying in t and space
+    if data.ndim == 3:
+        # empty array to store means
+        means = ma.empty((12, data.shape[1], data.shape[2]))
+        
+        # Calculate means month by month
+        for i in np.arange(12) + 1:
+            means[i - 1, :, :] = data[months == i, :, :].mean(0)
+        
+    #if the data is a timeseries over area-averaged values
+    if data.ndim == 1:
+        # TODO - Investigate using ma per KDW
+        means = np.empty((12)) # empty array to store means??WHY NOT ma?
+        
+        # Calculate means month by month
+        for i in np.arange(12) + 1:
+            means[i - 1] = data[months == i].mean(0)
+    
+    return means
+
+
+def calc_annual_cycle_std(data, time):
+    '''
+     Calculate monthly standard deviations for every grid point
+    '''
+    # Extract months from time variable
+    months = np.empty(len(time))
+    
+    for t in np.arange(len(time)):
+        months[t] = time[t].month
+    
+    # empty array to store means
+    stds = np.empty((12, data.shape[1], data.shape[2]))
+    
+    # Calculate means month by month
+    for i in np.arange(12) + 1:
+        stds[i - 1, :, :] = data[months == i, :, :].std(axis = 0, ddof = 1)
+    
+    return stds
+
+
+def calc_annual_cycle_domain_means(data, time):
+    '''
+     Calculate domain means for each month of the year
+    '''
+    # Extract months from time variable
+    months = np.empty(len(time))
+    
+    for t in np.arange(len(time)):
+        months[t] = time[t].month
+       	
+    means = np.empty(12) # empty array to store means
+    
+    # Calculate means month by month
+    for i in np.arange(12) + 1:
+        means[i - 1] = data[months == i, :, :].mean()
+    
+    return means
+
+
+def calc_annual_cycle_domain_std(data, time):
+    '''
+     Calculate domain standard deviations for each month of the year
+    '''
+    # Extract months from time variable
+    months = np.empty(len(time))
+    
+    for t in np.arange(len(time)):
+        months[t] = time[t].month
+    
+    stds = np.empty(12) # empty array to store means
+    
+    # Calculate means month by month
+    for i in np.arange(12) + 1:
+        stds[i - 1] = data[months == i, :, :].std(ddof = 1)
+    
+    return stds
+
+
+def calc_bias_annual(t1, t2, optn):        # Mean Bias
+    '''
+    Calculate the mean difference between two fields over time for each grid point.
+    '''
+    # Calculate mean difference between two fields over time for each grid point
+    # Precrocessing of both obs and model data ensures the absence of missing values
+    diff = t1-t2
+    if(open == 'abs'): 
+        diff = abs(diff)
+    bias = diff.mean(axis = 0)
+    return bias
+
+
+def calc_bias(t1, t2):
+    '''
+    Calculate mean difference between two fields over time for each grid point
+    
+    Classify missing data resulting from multiple times (using threshold 
+    data requirement)
+    
+    i.e. if the working time unit is monthly data, and we are dealing with 
+    multiple months of data then when we show mean of several months, we need
+    to decide what threshold of missing data we tolerate before classifying a
+    data point as missing data.
+    '''
+    t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75)
+    t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75)
+    
+    diff = t1 - t2
+    bias = diff.mean(axis = 0)
+    
+    # Set mask for bias metric using missing data in obs or model data series
+    #   i.e. if obs contains more than threshold (e.g.50%) missing data 
+    #        then classify time average bias as missing data for that location. 
+    bias = ma.masked_array(bias.data, np.logical_or(t1Mask, t2Mask))
+    return bias
+
+
+def calc_bias_dom(t1, t2):
+    '''
+     Calculate domain mean difference between two fields over time
+    '''
+    diff = t1 - t2
+    bias = diff.mean()
+    return bias
+
+
+def calc_difference(t1, t2):
+    '''
+     Calculate mean difference between two fields over time for each grid point
+    '''
+    print 'Calculating difference'
+    diff = t1 - t2
+    return diff
+
+
+def calc_mae(t1, t2):
+    '''
+    Calculate mean difference between two fields over time for each grid point
+    
+    Classify missing data resulting from multiple times (using threshold 
+    data requirement) 
+    
+    i.e. if the working time unit is monthly data, and we are dealing with
+    multiple months of data then when we show mean of several months, we need
+    to decide what threshold of missing data we tolerate before classifying
+    a data point as missing data.
+    '''
+    t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75)
+    t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75)
+    
+    diff = t1 - t2
+    adiff = abs(diff)
+    
+    mae = adiff.mean(axis = 0)
+    
+    # Set mask for mae metric using missing data in obs or model data series
+    #   i.e. if obs contains more than threshold (e.g.50%) missing data 
+    #        then classify time average mae as missing data for that location. 
+    mae = ma.masked_array(mae.data, np.logical_or(t1Mask, t2Mask))
+    return mae
+
+
+def calc_mae_dom(t1, t2):
+    '''
+     Calculate domain mean difference between two fields over time
+    '''
+    diff = t1 - t2
+    adiff = abs(diff)
+    mae = adiff.mean()
+    return mae
+
+
+def calc_rms(t1, t2):
+    '''
+     Calculate mean difference between two fields over time for each grid point
+    '''
+    diff = t1 - t2
+    sqdiff = diff ** 2
+    msd = sqdiff.mean(axis = 0)
+    rms = np.sqrt(msd)
+    return rms
+
+
+def calc_rms_dom(t1, t2):
+    '''
+     Calculate RMS differences between two fields
+    '''
+    diff = t1 - t2
+    sqdiff = diff ** 2
+    msd = sqdiff.mean()
+    rms = np.sqrt(msd)
+    return rms
+
+
+def calc_temporal_stdv(t1):
+    '''
+    Calculate the temporal standard deviation.
+
+    Input:
+        t1 - data array of any shape
+
+    Output:
+        A 2-D array of temporal standard deviation
+    '''
+    # TODO Make sure the first dimension of t1 is teh time axis.
+    stdv = t1.std(axis = 0)
+    return stdv
+
+
+def calc_temporal_anom_cor(mD, oD):
+    '''
+    Calculate the temporal anomaly correlation.
+
+    Assumption(s);
+        The first dimension of mD and oD is the time axis.
+
+    Input:
+        mD - model data array of any shape
+        oD - observation data array of any shape
+
+    Output:
+        A 2-D array of time series pattern correlation coefficients at each grid point.
+
+    REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp.
+    '''
+    mo = oD.mean(axis = 0)
+    nt = oD.shape[0]
+    deno1 = ((mD - mo) * (mD - mo)).sum(axis = 0)
+    deno2 = ((oD - mo) * (oD - mo)).sum(axis = 0)
+    patcor = ((mD - mo) * (oD - mo)).sum(axis = 0) / sqrt(deno1 * deno2)
+    return patcor
+
+
+def calc_spatial_anom_cor(mD, oD):
+    '''
+    Calculate anomaly correlation between two 2-D arrays.
+
+    Input:
+        mD - 2-D array of model data
+        oD - 2-D array of observation data
+
+    Output:
+        The anomaly correlation between the two input arrays.
+    '''
+    mo = oD.mean()
+    d1 = ((mD - mo)*(mD - mo)).sum()
+    d2 = ((oD - mo)*(oD - mo)).sum()
+    patcor = ((mD - mo) * (oD - mo)).sum() / sqrt(d1 * d2)
+    return patcor
+
+
+def calc_temporal_pat_cor(t1, t2):
+    '''
+     Calculate the Temporal Pattern Correlation
+    
+      Input::
+        t1 - 3d array of model data
+        t2 - 3d array of obs data
+         
+      Output::
+        2d array of time series pattern correlation coefficients at each grid point.
+        **Note:** std_dev is standardized on 1 degree of freedom
+    '''
+    mt1 = t1.mean(axis = 0)
+    mt2 = t2.mean(axis = 0)
+    nt = t1.shape[0]
+    sigma_t1 = t1.std(axis = 0, ddof = 1)
+    sigma_t2 = t2.std(axis = 0, ddof = 1)
+    patcor = ((((t1 - mt1) * (t2 - mt2)).sum(axis = 0)) / (nt)) / (sigma_t1 * sigma_t2)
+    
+    return patcor
+
+
+def calc_spatial_pat_cor(t1, t2):
+    '''
+    Calcualte pattern correlation between 2-D arrays.
+    6/10/2013: JK: Enforce both t1 & t2 have the identical mask before calculating std and corr
+
+    Input:
+        t1 - 2-D array of model data
+        t2 - 2-D array of observation data
+
+    Output:
+        Pattern correlation between two input arrays.
+    '''
+    import numpy as np
+    msk1 = ma.getmaskarray(t1)
+    msk2 = ma.getmaskarray(t2)
+    t1 = ma.masked_array(t1.data, np.logical_or(msk1, msk2))
+    t2 = ma.masked_array(t2.data, np.logical_or(msk1, msk2))
+    np = ma.count(t1)
+    mt1 = t1.mean()
+    mt2 = t2.mean()
+    st1 = t1.std()
+    st2 = t2.std()
+    patcor = ((t1 - mt1) * (t2 - mt2)).sum() / (np * st1 * st2)
+    return patcor
+
+
+def calc_pat_cor2D(t1, t2, nT):
+    '''
+    Calculate the pattern correlation between 3-D input arrays.
+
+    Input:
+        t1 - 3-D array of model data
+        t2 - 3-D array of observation data
+        nT
+
+    Output:
+        1-D array (time series) of pattern correlation coefficients.
+    '''
+    # TODO - Update docstring. What is nT?
+    nt = t1.shape[0]
+    if(nt != nT):
+        print 'input time levels do not match: Exit', nT, nt
+        return -1
+    # store results in list for convenience (then convert to numpy array at the end)
+    patcor = []
+    for t in xrange(nt):
+        mt1 = t1[t, :, :].mean()
+        mt2 = t2[t, :, :].mean()
+        sigma_t1 = t1[t, :, :].std()
+        sigma_t2 = t2[t, :, :].std()
+        # TODO: make means and standard deviations weighted by grid box area.
+        patcor.append((((((t1[t, :, :] - mt1) * (t2[t, :, :] - mt2)).sum()) / 
+                     (t1.shape[1] * t1.shape[2]) ) / (sigma_t1 * sigma_t2)))
+        print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t]
+    # TODO: deal with missing data appropriately, i.e. mask out grid points with missing data above tolerence level
+    # convert from list into numpy array
+    patcor = numpy.array(patcor)
+    print patcor.shape
+    return patcor
+
+def calc_pat_cor(dataset_1, dataset_2):
+    '''
+     Purpose: Calculate the Pattern Correlation: 1-d & 2-d Timeseries, 2-d vs 2-d
+
+     Assumption(s)::  
+     	Both dataset_1 and dataset_2 are the same shape.
+        * lat, lon must match up
+        * time steps must align (i.e. months vs. months)
+    
+     Input::
+        dataset_1 - 3d (time, lat, lon) array of data
+        dataset_2 - 3d (time, lat, lon) array of data
+         
+     Output:
+        patcor - a 1d array (time series) of pattern correlation coefficients.
+    
+     **Note:** Standard deviation is using 1 degree of freedom.  Debugging print 
+     statements to show the difference the n-1 makes. http://docs.scipy.org/doc/numpy/reference/generated/numpy.std.html
+    '''
+
+    # TODO:  Add in try block to ensure the shapes match
+
+    nDim1 = dataset_1.ndim
+    nDim2 = dataset_2.ndim
+    if nDim1 != nDim2:
+        print 'dimension mismatch in calc_pat_cor: exit', nDim1,nDim2
+        sys.exit()
+
+    if nDim1 == 1:
+        mt1 = dataset_1.mean()
+        mt2 = dataset_2.mean()
+        np = dataset_1.shape
+        sig1 = dataset_1.std()
+        sig2 = dataset_2.std()
+        patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / (np * (sig1 * sig2))
+
+    elif nDim1 == 2:
+        # find mean and std_dev 
+        mt1 = dataset_1.mean()
+        mt2 = dataset_2.mean()
+        ny = dataset_1.shape[0]
+        nx = dataset_1.shape[1]
+        sigma_t1 = dataset_1.std()
+        sigma_t2 = dataset_2.std()
+        patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / ((ny * nx) * (sigma_t1 * sigma_t2))
+
+    else:
+        nt = dataset_1.shape[0]
+        ny = dataset_1.shape[1]
+        nx = dataset_1.shape[2]
+        # store results in list for convenience (then convert to numpy array)
+        patcor = []
+        for t in xrange(nt):
+            # find mean and std_dev 
+            mt1 = dataset_1[t, :, :].mean()
+            mt2 = dataset_2[t, :, :].mean()
+            sigma_t1 = dataset_1[t, :, :].std(ddof = 1)
+            sigma_t2 = dataset_2[t, :, :].std(ddof=1)
+            # TODO: make means and standard deviations weighted by grid box area.
+            # Equation from Santer_et_al 1995 
+            #     patcor = (1/(N*M_std*O_std))*sum((M_i-M_bar)*(O_i-O_bar))
+            patcor.append((((((dataset_1[t, :, :] - mt1) * (dataset_2[t, :, :] - mt2)).sum()) / (ny * nx)) / (sigma_t1 * sigma_t2)))
+            print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t]
+            # TODO: deal with missing data appropriately, i.e. mask out grid points
+            # with missing data above tolerance level
+        # convert from list into numpy array
+        patcor = np.array(patcor)
+    
+    print patcor.shape, patcor
+    return patcor
+
+
+def calc_anom_corn(dataset_1, dataset_2, climatology = None):
+    '''
+    Calculate the anomaly correlation.
+
+    Input:
+        dataset_1 - First input dataset
+        dataset_2 - Second input dataset
+        climatology - Optional climatology input array. Assumption is that it is for 
+            the same time period by default.
+
+    Output:
+        The anomaly correlation.
+    '''
+    # TODO: Update docstring with actual useful information
+
+    # store results in list for convenience (then convert to numpy array)
+    anomcor = []    
+    nt = dataset_1.shape[0]
+    #prompt for the third file, i.e. climo file...  
+    #include making sure the lat, lon and times are ok for comparision
+    # find the climo in here and then using for eg, if 100 yrs 
+    # is given for the climo file, but only looking at 10yrs
+    # ask if want to input climo dataset for use....if no, call previous 
+   
+    if climatology != None:
+        climoFileOption = raw_input('Would you like to use the full observation dataset as \
+                                     the climatology in this calculation? [y/n] \n>')
+        if climoFileOption == 'y':
+            base_dataset = climatology
+        else:
+            base_dataset = dataset_2
+    for t in xrange(nt):
+        mean_base = base_dataset[t, :, :].mean()
+        anomcor.append((((dataset_1[t, :, :] - mean_base) * (dataset_2[t, :, :] - mean_base)).sum()) / 
+                       np.sqrt(((dataset_1[t, :, :] - mean_base) ** 2).sum() * 
+                               ((dataset_2[t, :, :] - mean_base) ** 2).sum()))
+        print t, mean_base.shape, anomcor[t]
+
+    # TODO: deal with missing data appropriately, i.e. mask out grid points 
+    # with missing data above tolerence level
+    
+    # convert from list into numpy array
+    anomcor = np.array(anomcor)
+    print anomcor.shape, anomcor.ndim, anomcor
+    return anomcor
+
+
+def calc_anom_cor(t1, t2):
+    '''
+     Calculate the Anomaly Correlation (Deprecated)
+    '''
+    
+    nt = t1.shape[0]
+    
+    # store results in list for convenience (then convert to numpy 
+    # array at the end)
+    anomcor = []
+    for t in xrange(nt):
+        
+        mt2 = t2[t, :, :].mean()
+        
+        sigma_t1 = t1[t, :, :].std(ddof = 1)
+        sigma_t2 = t2[t, :, :].std(ddof = 1)
+        
+        # TODO: make means and standard deviations weighted by grid box area.
+        
+        anomcor.append(((((t1[t, :, :] - mt2) * (t2[t, :, :] - mt2)).sum()) / 
+                        (t1.shape[1] * t1.shape[2])) / (sigma_t1 * sigma_t2))
+        
+        print t, mt2.shape, sigma_t1.shape, sigma_t2.shape, anomcor[t]
+        
+        # TODO: deal with missing data appropriately, i.e. mask out grid points with 
+        #       missing data above tolerence level
+        
+    # convert from list into numpy array
+    anomcor = np.array(anomcor)
+    print anomcor.shape, anomcor.ndim, anomcor
+    return anomcor
+
+
+def calc_nash_sutcliff(dataset_1, dataset_2):
+    '''
+    Routine to calculate the Nash-Sutcliff coefficient of efficiency (E)
+    
+    Assumption(s)::  
+    	Both dataset_1 and dataset_2 are the same shape.
+        * lat, lon must match up
+        * time steps must align (i.e. months vs. months)
+    
+    Input::
+    	dataset_1 - 3d (time, lat, lon) array of data
+        dataset_2 - 3d (time, lat, lon) array of data
+    
+    Output:
+        nashcor - 1d array aligned along the time dimension of the input
+        datasets. Time Series of Nash-Sutcliff Coefficient of efficiency
+     
+     '''
+
+    nt = dataset_1.shape[0]
+    nashcor = []
+    for t in xrange(nt):
+        mean_dataset_2 = dataset_2[t, :, :].mean()
+        
+        nashcor.append(1 - ((((dataset_2[t, :, :] - dataset_1[t, :, :]) ** 2).sum()) / 
+                            ((dataset_2[t, :, :] - mean_dataset_2) ** 2).sum()))
+        
+        print t, mean_dataset_2.shape, nashcor[t]
+        
+    nashcor = np.array(nashcor)
+    print nashcor.shape, nashcor.ndim, nashcor
+    return nashcor
+
+
+def calc_pdf(dataset_1, dataset_2):
+    '''
+    Routine to calculate a normalized Probability Distribution Function with 
+    bins set according to data range.
+    Equation from Perkins et al. 2007
+
+        PS=sum(min(Z_O_i, Z_M_i)) where Z is the distribution (histogram of the data for either set)
+        called in do_rcmes_processing_sub.py
+         
+    Inputs::
+        2 arrays of data
+        t1 is the modelData and t2 is 3D obsdata - time,lat, lon NB, time here 
+        is the number of time values eg for time period 199001010000 - 199201010000 
+        
+        if annual means-opt 1, was chosen, then t2.shape = (2,lat,lon)
+        
+        if monthly means - opt 2, was choosen, then t2.shape = (24,lat,lon)
+        
+    User inputs: number of bins to use and edges (min and max)
+    Output:
+
+        one float which represents the PDF for the year
+
+    TODO:  Clean up this docstring so we have a single purpose statement
+     
+    Routine to calculate a normalised PDF with bins set according to data range.
+
+    Input::
+        2 data  arrays, modelData and obsData
+
+    Output::
+        PDF for the year
+
+    '''
+    #list to store PDFs of modelData and obsData
+    pdf_mod = []
+    pdf_obs = []
+    # float to store the final PDF similarity score
+    similarity_score = 0.0
+    d1_max = dataset_1.amax()
+    d1_min = dataset_1.amin()
+
+    print 'min modelData', dataset_1[:, :, :].min()
+    print 'max modelData', dataset_1[:, :, :].max()
+    print 'min obsData', dataset_2[:, :, :].min()
+    print 'max obsData', dataset_2[:, :, :].max()
+    # find a distribution for the entire dataset
+    #prompt the user to enter the min, max and number of bin values. 
+    # The max, min info above is to help guide the user with these choises
+    print '****PDF input values from user required **** \n'
+    nbins = int (raw_input('Please enter the number of bins to use. \n'))
+    minEdge = float(raw_input('Please enter the minimum value to use for the edge. \n'))
+    maxEdge = float(raw_input('Please enter the maximum value to use for the edge. \n'))
+    
+    mybins = np.linspace(minEdge, maxEdge, nbins)
+    print 'nbins is', nbins, 'mybins are', mybins
+    
+    
+    # TODO:  there is no 'new' kwargs for numpy.histogram 
+    # per: http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html
+    # PLAN: Replace new with density param.
+    pdf_mod, edges = np.histogram(dataset_1, bins = mybins, normed = True, new = True)  
+    print 'dataset_1 distribution and edges', pdf_mod, edges
+    pdf_obs, edges = np.histogram(dataset_2, bins = mybins, normed = True, new = True)           
+    print 'dataset_2 distribution and edges', pdf_obs, edges    
+    
+    # TODO: drop this
+    """
+    considering using pdf function from statistics package. It is not 
+     installed. Have to test on Mac.  
+     http://bonsai.hgc.jp/~mdehoon/software/python/Statistics/manual/index.xhtml#TOC31 
+    pdf_mod, edges = stats.pdf(dataset_1, bins=mybins)
+    print 'dataset_1 distribution and edges', pdf_mod, edges
+    pdf_obs,edges=stats.pdf(dataset_2,bins=mybins)           
+    print 'dataset_2 distribution and edges', pdf_obs, edges 
+    """
+
+    #find minimum at each bin between lists 
+    i = 0
+    for model_value in pdf_mod :
+        print 'model_value is', model_value, 'pdf_obs[', i, '] is', pdf_obs[i]
+        if model_value < pdf_obs[i]:
+            similarity_score += model_value
+        else:
+            similarity_score += pdf_obs[i] 
+        i += 1 
+    print 'similarity_score is', similarity_score
+    return similarity_score
+
+
+def calc_stdev(t1):
+    ''' 
+    Calculate the standard deviation for a given dataset.
+
+    Input:
+        t1 - Dataset to calculate the standard deviation on.
+
+    Output:
+        Array of the standard deviations for each month in the provided dataset.
+    '''
+    nt = t1.shape[0]
+    sigma_t1 = []
+    for t in xrange(nt):
+        sigma_t1.append(t1[t, :, :].std(ddof = 1))
+    sigma_t1 = np.array(sigma_t1)
+    print sigma_t1, sigma_t1.shape
+    return sigma_t1
+
+# 6/10/2013 JK: plotting routines are added below
+def taylorDiagram(pltDat, pltTit, pltFileName, refName, legendPos):
+    '''
+    Draw a Taylor diagram
+    '''
+    stdref = 1.                     # Standard reference value 
+    rect = 111                      # Subplot setting and location
+    markerSize = 6
+    fig = plt.figure()
+    fig.suptitle(pltTit)            # PLot title
+    dia = TaylorDiagram (stdref, fig = fig, rect = rect, label = refName)
+    for i,(stddev,corrcoef,name) in enumerate(pltDat):
+        dia.add_sample (stddev, corrcoef, marker = '$%d$' % (i+1), ms = markerSize, label=name)
+    # Add ploylines to mark a range specified by input data - 2 be implemented
+    #circular_line = dia.add_stddev_contours(0.959, 1, 0.965)
+    #circular_line = dia.add_stddev_contours(1.1, 1, 0.973)
+    #straight_line = dia.add_contours(0.959, 1, 1.1, 1)
+    #straight_line = dia.add_contours(0.959, 0.965, 1.1, 0.973)
+    #l=fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos)
+    # loc: 1='upper right', 2='upper left' or specified in the calling program via "legendPos"
+    l = fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos)
+    l.draw_frame(False)
+    plt.savefig(pltFileName)
+    plt.show()
+    pltDat = 0.
+
+
+def pltXY(x, y, lineLabel, lineTyp, pltTit, xLabel, yLabel, pltName, xmin, xmax, deltaX, ymin, ymax, deltaY, legendPos):
+    """
+    The default drawing order for axes is patches, lines, text.This order is determined by the zorder attribute. The following defaults are set:
+    Artist                      Z-order
+    Patch / PatchCollection      1
+    Line2D / LineCollection      2
+    Text                         3
+    You can change the order for individual artists by setting the zorder.  Any individual plot() call can set a value
+      for the zorder of that particular item.
+    In the fist subplot below, the lines are drawn above the patch collection from the scatter, which is the default.
+    In the subplot below, the order is reversed.
+    The second figure shows how to control the zorder of individual lines.
+    Arguments
+      x (nX)   : np array: the number of points in the x axis
+      y (nX,nY): np array: the number of y values to be plotted
+      lineLabel: list(nY): labels for individual y data
+      pltTit   : Text    : plot title
+      xLabel   : Text    : x-axis label
+      yLabel   : Text    : y-axis label
+      pltName  : Text    : name of the plot file
+    3/28/2013 Jinwon Kim: Modification of a routine in the matplotlib gallery
+    """
+    lineColors = ['k','b','r','g','c','m','y','0.6','0.8']
+    nX = x.size
+    nY = len(lineLabel)
+    fig = plt.figure()
+    ax = fig.add_subplot(1,1,1)
+    for n in np.arange(nY):
+        plot(x,y[n,:],linewidth=1,color=lineColors[n],linestyle=lineTyp[n],label=lineLabel[n],zorder = 10)
+    xlabel(xLabel,fontsize=10); ylabel(yLabel,fontsize=10)
+    if xmax > xmin:
+        plt.xlim(xmin,xmax)
+        ticks = frange(xmin,xmax,npts=(xmax-xmin)/deltaX+1)
+        ax.xaxis.set_ticks(ticks,minor=False)
+    if ymax > ymin:
+        plt.ylim(ymin,ymax)
+        ticks = frange(ymin,ymax,npts=(ymax-ymin)/deltaY+1)
+        ax.yaxis.set_ticks(ticks,minor=False)
+    ax.set_title(pltTit)
+    ax.xaxis.tick_bottom(); ax.yaxis.tick_left()    # put tick marks only on the left (for y) and bottom (for x) axis
+    if(nY>1):
+        l = legend(prop={'size':10},loc='best')
+        l.set_zorder(20) # put the legend on top
+    plt.savefig(pltName)
+    show()
+    # release work arrays
+    x=0.; y=0.
+
+
+def drawCntrMap(data,lon,lat,titles,ncols,pFname):
+    '''
+    This routine is based on PyNGL, i.e., NcarGraphics
+    data - a masked numpy array of data to plot (nT,nY,nX)
+    lon - longitude (nY,nX)
+    lat - latitude  (nY,nX)
+    titles - array of titles (nT)
+    nrows - number of rows of the paneled plots
+    ncols - number of columns of the paneled plots
+    nT = nrows * ncols
+    pFname - name of output postscript file
+    '''
+    wks_type = 'ps'
+    if data.ndim == 2:
+        nT = 1; nrows = 1; ncols = 1
+    elif data.ndim == 3:
+        nT=data.shape[0]
+        if nT % ncols == 0:
+            nrows = nT/ncols
+        else:
+            nrows = nT/ncols + 1
+    # set workstation type (X11, ps, png)
+    res = Ngl.Resources()
+    wks = Ngl.open_wks(wks_type,pFname,res)
+    # set plot resource paramters
+    resources = Ngl.Resources()
+    resources.mpLimitMode = "LatLon"    # Limit the map view.
+    resources.mpMinLonF   = lon.min()
+    resources.mpMaxLonF   = lon.max()
+    resources.mpMinLatF   = lat.min()
+    resources.mpMaxLatF   = lat.max()
+    resources.cnFillOn      = True
+    resources.cnLineLabelsOn        = False    # Turn off line labels.
+    resources.cnInfoLabelOn         = False    # Turn off info label.
+    resources.cnLinesOn             = False    # Turn off countour line (only filled colors)
+    resources.sfXArray              = lon[:,:]
+    resources.sfYArray              = lat[:,:]
+    resources.pmTickMarkDisplayMode = "Never"  # Turn off map tickmarks.
+    resources.mpOutlineBoundarySets = "GeophysicalAndUSStates"
+    resources.mpGeophysicalLineColor = "red"
+    resources.mpUSStateLineColor = "red"
+    resources.mpGeophysicalLineThicknessF = 0.75
+    resources.mpUSStateLineThicknessF = 0.75
+    resources.mpPerimOn             = True     # Turn on/off map perimeter
+    resources.mpGridAndLimbOn = True           # Turn off map grid.
+    resources.nglFrame = False    # Don't advance the frame
+    resources.nglDraw = False
+    plot=[]
+    for iT in np.arange(nT):
+        resources.tiMainString = titles[iT]
+        #resources.pmLabelBarDisplayMode = "Never" # Turn off individual label bars
+        resources.pmLabelBarDisplayMode = "Always" # Turn on individual label bars
+        if data.ndim == 3:
+            plot.append(Ngl.contour_map(wks,data[iT,:,:],resources))
+        elif data.ndim == 2:
+            plot.append(Ngl.contour_map(wks,data[:,:],resources))
+    panelres = Ngl.Resources()
+    panelres.nglPanelTop=0.95
+    panelres.nglPanelBottom=0.05
+    panelres.nglPanelLabelBar                 = False   # Turn on panel labelbar
+    panelres.nglPanelLabelBarLabelFontHeightF = 0.012  # Labelbar font height
+    panelres.nglPanelLabelBarHeightF          = 0.03   # Height of labelbar
+    panelres.nglPanelLabelBarWidthF           = 0.8    # Width of labelbar
+    panelres.lbLabelFont                      = "helvetica-bold" # Labelbar font
+    Ngl.panel(wks,plot[0:nT],[nrows,ncols],panelres)
+    Ngl.destroy(wks)
+    del plot
+    del resources
+    del wks
+    return
+
+
+def metrics_plots(varName, numOBS, numMDL, nT, ngrdY, ngrdX, Times, lons, lats, allData, dataList, workdir, subRegions, timeStep, fileOutputOption):
+    '''
+    Calculate evaluation metrics and generate plots.
+    '''
+    ##################################################################################################################
+    # Routine to compute evaluation metrics and generate plots
+    #    (1)  metric calculation
+    #    (2) plot production
+    #    Input: 
+    #        numOBS           - the number of obs data. either 1 or >2 as obs ensemble is added for multi-obs cases
+    #        numMDL           - the number of mdl data. either 1 or >2 as obs ensemble is added for multi-mdl cases
+    #        nT               - the length of the data in the time dimension
+    #        ngrdY            - the length of the data in Y dimension
+    #        ngrdX            - the length of the data in the X dimension
+    #        Times            - time stamps
+    #        lons,lats        - longitude & latitude values of the data domain (same for model & obs)
+    #        allData          - the sum of the observed and model data (combines obsData & mdlData in the old code)
+    #        dataList         - the list of data names (combines obsList and mdlList in the old code)
+    #        workdir        - string describing the directory path for storing results and plots
+    #        subRegions        - list of SubRegion Objects or False
+    #        fileOutputOption - option to write regridded data in a netCDF file or not
+    #    Output: image files of plots + possibly data
+    #******************************************************
+    # JK2.0: Only the data interpolated temporally and spatially onto the analysis grid 
+    #        are transferred into this routine. The rest of processing (e.g., area-averaging, etc.) 
+    #        are to be performed in this routine. Do not overwrite obsData[numOBs,nt,ngrdY,ngrdX] & 
+    #        mdlData[numMDL,nt,ngrdY,ngrdX]. These are the raw, re-gridded data to be used repeatedly 
+    #        for multiple evaluation steps as desired by an evaluator
+    # JK2.1: The observed and model data are unified in a single variable for ease of processing
+    ##################################################################################################################
+
+    # JK2.1: define the variable to represent the total number of combined (obs + model) datasets
+    numDatasets = numOBS + numMDL
+
+    #####################################################################################################
+    # JK2.0: Compute evaluation metrics and plots to visualize the results
+    #####################################################################################################
+    # (mp.001) Sub-regions for local timeseries analysis
+    #--------------------------------
+    # Enter the location of the subrgns via screen input of data; 
+    # modify this to add an option to read-in from data file(s)
+    #----------------------------------------------------------------------------------------------------
+    if subRegions:
+        numSubRgn = len(subRegions)
+        subRgnName = [ x.name   for x in subRegions ]
+        subRgnLon0 = [ x.lonMin for x in subRegions ]
+        subRgnLon1 = [ x.lonMax for x in subRegions ]
+        subRgnLat0 = [ x.latMin for x in subRegions ]
+        subRgnLat1 = [ x.latMax for x in subRegions ]
+    else:
+        print ''
+        ans = raw_input('Calculate area-mean timeseries for subregions? y/n: [n] \n')
+        print ''
+        if ans == 'y':
+            ans = raw_input('Input subregion info interactively? y/n: \n> ')
+            if ans == 'y':
+                numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_interactively()
+            else:
+                print 'Read subregion info from a pre-fabricated text file'
+                ans = raw_input('Read from a defaule file (workdir + "/sub_regions.txt")? y/n: \n> ')
+                if ans == 'y':
+                    subRgnFileName = workdir + "/sub_regions.txt"
+                else:
+                    subRgnFileName = raw_input('Enter the subRgnFileName to read from \n')
+                print 'subRgnFileName ', subRgnFileName
+                numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_from_a_text_file(subRgnFileName)
+            print subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1
+        else:
+            numSubRgn = 0
+    # compute the area-mean timeseries for all subregions if subregion(s) are defined.
+    #   the number of subregions is usually small and memory usage is usually not a concern
+    dataRgn = np.zeros((numDatasets, numSubRgn, nT))
+            
+    if subRegions:        
+        print 'Enter area-averaging: allData.shape ', allData.shape
+        print 'Using Latitude/Longitude Mask for Area Averaging'  
+        for n in np.arange(numSubRgn):
+            # Define mask using regular lat/lon box specified by users ('mask=True' defines the area to be excluded)
+            maskLonMin = subRegions[n].lonMin 
+            if maskLonMin > 180.:
+                maskLonMin = maskLonMin - 360.
+            maskLonMax = subRegions[n].lonMax
+            if maskLonMax > 180.:
+                maskLonMax = maskLonMax - 360.
+            maskLatMin = subRegions[n].latMin
+            maskLatMax = subRegions[n].latMax
+            mask = np.logical_or(np.logical_or(lats <= maskLatMin, lats >= maskLatMax), 
+                                 np.logical_or(lons <= maskLonMin, lons >= maskLonMax))
+            
+            # Calculate area-weighted averages within this region and store in a new list
+            for k in np.arange(numDatasets):           #JK2.1: area-average all data
+                Store = []
+                for t in np.arange(nT):
+                    Store.append(process.calc_area_mean(allData[k, t, :, :], lats, lons, mymask = mask))
+                dataRgn[k, n, :] = ma.array(Store[:])
+            Store = []                               # release the memory allocated by temporary vars
+
+    #-------------------------------------------------------------------------
+    # (mp.002) fileOutputOption: The option to create a binary or netCDF file of processed 
+    #                      (re-gridded and regionally-averaged) data for user-specific processing. 
+    #                      This option is useful for advanced users who need more than
+    #                      the metrics and vidualization provided in the basic package.
+    #----------------------------------------------------------------------------------------------------
+    print ''
+    if not fileOutputOption:
+        while fileOutputOption not in ['no', 'nc']:
+            fileOutputOption = raw_input('Option for output files of obs/model data: Enter no/nc \
+                                for no, netCDF file \n> ').lower()
+    print ''
+
+    # write a netCDF file for post-processing if desired. JK21: binary output option has been completely eliminated
+    if fileOutputOption == 'nc':
+        fileName = '%s/%s_Tseries' % (workdir, varName)
+        tempName = fileName + '.' + 'nc'
+        if(os.path.exists(tempName) == True):
+            print "removing %s from the local filesystem, so it can be replaced..." % (tempName,)
+            cmnd = 'rm -f ' + tempName
+            subprocess.call(cmnd, shell=True)
+        files.writeNCfile1(fileName, numSubRgn, lons, lats, allData, dataRgn, dataList, subRegions)
+        print 'The regridded obs and model data are written in the netCDF file ', fileName
+
+    #####################################################################################################
+    ###################### Metrics calculation and plotting cycle starts from here ######################
+    #####################################################################################################
+    print ''
+    print 'OBS and MDL data have been prepared for the evaluation step'
+    print ''
+    doMetricsOption = raw_input('Want to calculate metrics and plot them? [y/n]\n> ').lower()
+    if doMetricsOption == 'y':
+        # Assign the variable name and unit to be used in plots
+        print 'The variable to be processed is ',timeStep,' ',varName
+        pltVarName = raw_input('Enter the variable name to appear in the plot\n> ')
+        pltVarUnit = raw_input('Enter the variable unit to appear in the plot\n> ')
+    print ''
+
+    ####################################################
+    # Terminate job if no metrics are to be calculated #
+    ####################################################
+
+    neval = 0
+
+    while doMetricsOption == 'y':
+        neval += 1
+        print ' '
+        print 'neval= ', neval
+        print ' '
+        #--------------------------------
+        # (mp.003) Preparation
+        #----------------------------------------------------------------------------------------------------
+        # Determine info on years (the years in the record [YR] and its number[numYR])
+        yy = ma.zeros(nT, 'i')
+        for n in np.arange(nT):
+            yy[n] = Times[n].strftime("%Y")
+        YR = np.unique(yy)
+        yy = 0
+        nYR = len(YR)
+        print 'nYR, YR = ', nYR, YR
+
+        # Select the eval domain: over the entire domain (spatial distrib) or regional time series
+        anlDomain = 'n'
+        anlRgn = 'n'
+        print ' '
+        analSelect = int(raw_input('Eval over domain (Enter 0) or time series of selected Sub Regions (Enter 1) \n> '))
+        print ' '
+        if analSelect == 0:
+            anlDomain = 'y'
+        elif analSelect == 1:
+            anlRgn = 'y'
+        else:
+            print 'analSelect= ', analSelect, ' is Not a valid option: CRASH'
+
+        #--------------------------------------------------------------------------------------------------------------------
+        # (mp.004) Select the model and data to be used in the evaluation step
+        # 6/7/2013: JK4 - unified handling of the ref & mdl datasets allows several diff types of evaluation (RCMES v2.1)
+        #                 such as ref vs. one model or ref; ref vs. all models; ref vs. all model + non-ref refs
+        #          refID: the ID of the reference data against which all data are to be evaluated
+        #          mdlID: the list of data ID's to be evaluated. if mdlSelect == -99, the list also includes non-ref obs data
+        #--------------------------------------------------------------------------------------------------------------------
+        refID = int(misc.select_data_combined(numDatasets, Times, dataList, 'ref'))
+        mdlSelect = int(misc.select_data_combined(numDatasets, Times, dataList, 'mdl'))
+        mdlID=[]
+        # Assign the data id to be evaluated. Note that a non-reference obs dataset is treated like a model dataset (mdlSelect == -99)
+        if mdlSelect >= 0:
+            mdlID.append(mdlSelect)
+        elif mdlSelect == -9:
+            for n in np.arange(numMDL):
+                mdlID.append(n+numOBS)
+        elif mdlSelect == -99:
+            for n in np.arange(refID):
+                mdlID.append(n)
+            for n in range(refID + 1, numDatasets):
+                mdlID.append(n)
+        refName = dataList[refID]
+        mdlName = []
+        numMdl = len(mdlID)
+        for n in np.arange(numMdl):
+            mdlName.append(dataList[mdlID[n]][0:6])
+        print 'selected reference and model data for evaluation= ', refName, mdlName
+
+        #--------------------------------
+        # (mp.005) Spatial distribution analysis/Evaluation (anlDomain='y')
+        #          All climatology variables are 2-d arrays (e.g., mClim = ma.zeros((ngrdY,ngrdX))
+        #----------------------------------------------------------------------------------------------------
+        if anlDomain == 'y':
+            # first determine the temporal properties to be evaluated
+            print ''
+            timeOption = misc.select_timOpt()
+            print ''
+            if timeOption == 1:
+                timeScale = 'Annual'
+                # compute the annual-mean time series and climatology. 
+                oTser, oClim = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+                mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX))
+                mClim = ma.zeros((numMdl, ngrdY, ngrdX))
+                for n in np.arange(numMdl):
+                    id = mdlID[n]
+                    mTser[n, :, :, :], mClim[n, :, :] = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times)
+            elif timeOption == 2:
+                timeScale = 'Seasonal'
+                # select the timeseries and climatology for a season specifiec by a user
+                mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX))
+                mClim = ma.zeros((numMdl, ngrdY, ngrdX))
+                print ' '
+                moBgn = int(raw_input('Enter the beginning month for your season. 1-12: \n> '))
+                moEnd = int(raw_input('Enter the ending month for your season. 1-12: \n> '))
+                print ' '
+                if moEnd >= moBgn:
+                    nMoPerSeason = moEnd - moBgn + 1
+                    oTser, oClim = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+                    for n in np.arange(numMdl):
+                        id = mdlID[n]
+                        mTser[n, :, :, :], mClim[n, :, :] = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times)
+                elif moEnd == moBgn:
+                    # Eval for a single month. mTser, oTser are the annual time series 
+                    # for the specified month (moEnd), and  mClim, oClim are the corresponding climatology
+                    oTser, oClim = calc_clim_One_month(moEnd, nYR, nT, allData[refID, :, :, :], Times)
+                    for n in np.arange(numMdl):
+                        id = mdlID[n]
+                        mTser[n, :, :, :], mClim[n, :, :] = calc_clim_One_month(moEnd, nYR, nT, allData[id, :, :, :], Times)
+                elif moEnd < moBgn:        # have to lose the ending year. redefine nYR=nYR-1, and drop the YR[nYR]
+                    nMoS1 = 12 - moBgn + 1
+                    nMoS2 = moEnd
+                    nMoPerSeason = nMoS1 + nMoS2
+                    mTser = ma.zeros((numMdl, nYR - 1, ngrdY, ngrdX))
+                    # calculate the seasonal timeseries and climatology for the model data
+                    for n in np.arange(numMdl):
+                        id = mdlID[n]
+                        mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[id, :, :, :], Times)
+                        mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times)
+                        for i in np.arange(nYR - 1):
+                            mTser[n, i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason
+                    mClim = ma.average(mTser, axis=0)
+                    # repeat for the obs data
+                    mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+                    mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+                    oTser = ma.zeros((nYR - 1, ngrdY, ngrdX))
+                    for i in np.arange(nYR - 1):
+                        oTser[i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason
+                    oClim = ma.zeros((ngrdY, ngrdX))
+                    oClim = ma.average(oTser, axis=0)
+                    nYR = nYR - 1
+                    yy = ma.empty(nYR)
+                    for i in np.arange(nYR):
+                        yy[i] = YR[i]
+                    mTser1 = 0.
+                    mTser2 = 0.
+                    mClim1 = 0.
+                    mClim2 = 0.
+            elif timeOption == 3:
+                timeScale = 'Monthly'
+                # compute the monthly-mean time series and climatology
+                # Note that the shapes of the output vars are: 
+                #   mTser = ma.zeros((nYR,12,ngrdY,ngrdX)) & mClim = ma.zeros((12,ngrdY,ngrdX))
+                # Also same for oTser = ma.zeros((nYR,12,ngrdY,ngrdX)) &,oClim = ma.zeros((12,ngrdY,ngrdX))
+                oTser, oClim = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times)
+                mTser = ma.zeros((numMdl, nYR, 12, ngrdY, ngrdX))
+                mClim = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+                for n in np.arange(numMdl):
+                    id = mdlID[n]
+                    mTser[n, :, :, :, :], mClim[n, :, :, :] = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times)
+            else:
+                # undefined process options. exit
+                print 'The desired temporal scale is not available this time. END the job'
+                sys.exit()
+
+            #--------------------------------
+            # (mp.006) Select metric to be calculated
+            # bias, mae, acct, accs, pcct, pccs, rmst, rmss, pdfSkillScore, taylor diagram
+            #----------------------------------------------------------------------------------------------------
+            print ' '
+            metricOption = misc.select_metrics()
+            print ' '
+
+            # metrics calculation: the shape of metricDat varies according to the metric type & timescale opetions
+
+            # metrics below yields a 2-d (annual or seasonal) or 3-d (monthly) array for each model
+            if metricOption == 'BIAS':
+                if timeScale == 'Monthly':
+                    oStdv = np.zeros(12)
+                    metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        for m in np.arange(12):
+                            metricDat[n, m, :, :]  = calc_bias(mTser[n, :, m, :, :], oTser[m, :, :])
+                            if n == 0:
+                                oStdv[m] = calc_temporal_stdv(oTser[m, :, :])
+                else:
+                    oStdv = calc_temporal_stdv(oTser)
+                    metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        metricDat[n, :, :]  = calc_bias(mTser[n, :, :, :], oTser)
+
+            elif metricOption == 'MAE':
+                if timeScale == 'Monthly':
+                    metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        for m in np.arange(12):
+                            metricDat[n, m, :, :]  = calc_mae(mTser[n, :, m, :, :], oTser[m, :, :])
+                else:
+                    metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        metricDat[n, :, :]  = calc_mae(mTser[n, :, :, :], oTser)
+
+            elif metricOption == 'ACCt':
+                if timeScale == 'Monthly':
+                    metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        for m in np.arange(12):
+                            metricDat[n, m, :, :]  = calc_temporal_anom_cor(mTser[n, :, m, :, :], oTser[m, :, :])
+                else:
+                    metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        metricDat[n, :, :]  = calc_temporal_anom_cor(mTser[n, :, :, :], oTser)
+
+            elif metricOption == 'PCCt':
+                if timeScale == 'Monthly':
+                    metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        for m in np.arange(12):
+                            metricDat[n, m, :, :]  = calc_temporal_pat_cor(mTser[n, :, m, :, :], oTser[m, :, :])
+                else:
+                    metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        metricDat[n, :, :]  = calc_temporal_pat_cor(mTser[n, :, :, :], oTser)
+
+            elif metricOption == 'RMSt':
+                if timeScale == 'Monthly':
+                    metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        for m in np.arange(12):
+                            metricDat[n, m, :, :]  = calc_rms(mTser[n, :, m, :, :], oTser[m, :, :])
+                else:
+                    metricDat = ma.zeros((numMdl, ngrdY, ngrdX))
+                    for n in np.arange(numMdl):
+                        metricDat[n, :, :]  = calc_rms(mTser[n, :, :, :], oTser)
+
+            # metrics below yields a scalar value for each model
+            elif metricOption == 'ACCs':
+                if timeScale == 'Monthly':
+                    metricDat = ma.zeros((numMdl, 12))
+                    for n in np.arange(numMdl):
+                        for m in np.arange(12):
+                            metricDat[n, m]  = calc_spatial_anom_cor(mClim[n, m, :, :], oClim[m, :, :])
+                else:
+                    metricDat = ma.zeros(numMdl)
+                    for n in np.arange(numMdl):
+                        metricDat[n]  = calc_spatial_anom_cor(mClim[n, :, :], oClim)
+
+            elif metricOption == 'PCCs':
+                if timeScale == 'Monthly':
+                    metricDat = ma.zeros((numMdl, 12))
+                    for n in np.arange(numMdl):
+                        for m in np.arange(12):
+                            metricDat[n, m]  = calc_spatial_pat_cor(mClim[n, m, :, :], oClim[m, :, :])
+                else:
+                    metricDat = ma.zeros(numMdl)
+                    for n in np.arange(numMdl):
+                        metricDat[n]  = calc_spatial_pat_cor(mTmp[n, :, :], oTmp)
+
+            elif metricOption == 'RMSs':
+                if timeScale == 'Monthly':
+                    metricDat = ma.zeros((numMdl, 12))
+                    for n in np.arange(numMdl):
+                        for m in np.arange(12):
+                            metricDat[n, m]  = rms_dom(mClim[n, m, :, :], oClim[m, :, :])
+                else:
+                    metricDat = ma.zeros(numMdl)
+                    for n in np.arange(numMdl):
+                        metricDat[n]  = rms_dom(mClim[n, :, :], oClim)
+
+            # metrics to plot taylor diagram
+            elif metricOption == 'Taylor_space':
+                if timeScale == 'Monthly':
+                    oStdv = ma.zeros(12)
+                    mStdv = ma.zeros((numMdl, 12))
+                    mCorr = ma.zeros((numMdl, 12))
+                    mTemp = ma.zeros((ngrdY, ngrdX))
+                    for m in np.arange(12):
+                        oStdv[m] = oClim[m, :, :].std() 
+                    for n in np.arange(numMdl):
+                        for m in np.arange(12):
+                            mTemp = mClim[n, m, :, :]
+                            mStdv[n, m] = mTemp.std()
+                            mCorr[n, m] = calc_spatial_pat_cor(mTemp, oClim)
+                else:
+                    oStdv = oClim.std()
+                    rStdv = mClim[refID, :, :].std()
+                    mStdv = ma.zeros(numMdl)
+                    mCorr = ma.zeros(numMdl)
+                    for n in np.arange(numMdl):
+                        mStdv[n] = mClim[n, :, :].std()
+                        mCorr[n] = calc_spatial_pat_cor(mClim[n, :, :], oClim)
+                mStdv = mStdv / oStdv                          # standardized deviation
+
+            #--------------------------------
+            # (mp.007) Plot the metrics. First, enter plot info
+            #----------------------------------------------------------------------------------------------------
+
+            # Taylor diagram
+            if metricOption == 'Taylor_space':
+                taylorDat = []
+                for n in np.arange(numMdl):
+                    ttmp = []
+                    ttmp.append(mStdv[n])
+                    ttmp.append(mCorr[n])
+                    entryLegend = mdlName[n]
+                    ttmp.append(entryLegend)
+                    taylorDat.append(ttmp)
+                pltTit = 'Spatial variability evaluation'
+                pltFileName = workdir + '/taylor_space_' + timeScale + '-' + varName
+                legendPos = 'upper right'
+                status = taylorDiagram(taylorDat,pltTit,pltFileName,refName,legendPos)
+
+            # 2-d contour plots
+            elif metricDat.ndim == 3:
+                # obs climatology
+                plotDat = np.zeros((ngrdY, ngrdX))
+                print''
+                makePlot = raw_input('Plot observed climatology? [y/n]\n> ').lower()
+                if makePlot == 'y':
+                    plotDat = oClim
+                    plotFileName = workdir + '/' + timeScale + '_' + varName + '_OBS'
+                    if(os.path.exists(plotFileName)==True):
+                        cmnd='rm -f ' + plotFileName
+                        subprocess.call(cmnd,shell=True)
+                    plotTitle = []
+                    plotTitle.append('Observed (' + pltVarName + ' )' + pltVarUnit + ':' + 'Climatology')
+                    ncols=1
+                    status = drawCntrMap(plotDat,lons,lats,plotTitle,ncols,plotFileName)
+                # Plot metrics (2-d contour plots)
+                print ''
+                print 'Plot',metricOption,'in',timeScale,varName
+                plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption
+                if(os.path.exists(plotFileName)==True):
+                    cmnd='rm -f ' + plotFileName
+                    subprocess.call(cmnd,shell=True)
+                pltTit = []
+                if metricDat.shape[0] > 1:
+                    ncols = 3
+                    for n in np.arange(numMdl):
+                        pltTit.append(mdlName[n])
+                elif metricDat.shape[0] == 1:
+                    ncols = 1
+                    pltTit.append(mdlName[0])
+                status = drawCntrMap(metricDat,lons,lats,pltTit,ncols,plotFileName)
+                metricDat = 0.
+
+                # if bias, plot also normalzied values and means: first, normalized by mean
+                if metricOption == 'BIAS':
+                    print ''
+                    makePlot = raw_input('Plot bias in terms of % of OBS mean? [y/n]\n> ').lower()
+                    print ''
+                    if makePlot == 'y':
+                        plotDat = 100.* metricDat / oClim
+                        plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption + '_MEAN'
+                        if(os.path.exists(plotFileName)==True):
+                            cmnd='rm -f ' + plotFileName
+                            subprocess.call(cmnd,shell=True)
+                        pltTit = []
+                        if metricDat.shape[0] > 1:
+                            ncols = 3
+                            for n in np.arange(numMdl):
+                                pltTit.append(mdlName[n])
+                        elif metricDat.shape[0] == 1:
+                            ncols = 1
+                            pltTit.append(mdlName[0])
+                        status = drawCntrMap(plotDat,lons,lats,pltTit,ncols,plotFileName)
+                        plotDat = 0.
+
+                    # normalized by sigma
+                    makePlot = raw_input('Plot bias in terms of % of interann sigma? [y/n]\n> ').lower()
+                    if makePlot == 'y':
+                        plotDat = 100.* metricDat / oStdv
+                        plotFileName = workdir + '/' + timeScale + '_' + varName + '_' + metricOption + '_SIG'
+                        if(os.path.exists(plotFileName)==True):
+                            cmnd='rm -f ' + plotFileName
+                            subprocess.call(cmnd,shell=True)
+                        pltTit = []
+                        if metricDat.shape[0] > 1:
+                            ncols = 3
+                            for n in np.arange(numMdl):
+                                pltTit.append(mdlName[n])
+                        elif metricDat.shape[0] == 1:
+                            ncols = 1
+                            pltTit.append(mdlName[0])
+                        status = drawCntrMap(plotDat,lons,lats,pltTit,ncols,plotFileName)
+                        plotDat = 0.
+
+            # Repeat for another metric. Also release memory used by temporary variables
+            oTser = 0.
+            oClim = 0.
+            mTser = 0.
+            mClim = 0.
+            print ''
+            print 'Evaluation completed'
+            print ''
+            doMetricsOption = raw_input('Do you want to perform another evaluation? [y/n]\n> ').lower()
+            print ''
+
+        # metrics and plots for regional time series
+        elif anlRgn == 'y' and subRegions == False:
+            print 'No SubRegions have been defined.  Regional Time Series Plots cannot be created'
+
+        # metrics and plots for regional time series
+        elif anlRgn == 'y':
+            oData = ma.zeros((numSubRgn, nT))
+            mData = ma.zeros((numMdl, numSubRgn, nT))
+            oData[:, :] = dataRgn[refID, :, :]
+            for n in np.arange(numMdl):
+                mData[n, :, :] = dataRgn[mdlID[n], :, :]
+            # select the region(s) for evaluation. model and obs have been selected before entering this if block
+            print 'There are %s subregions. Select the subregion(s) for evaluation' % numSubRgn
+            rgnSelect = misc.selectSubRegion(subRegions)
+            print 'selected region for evaluation= ', rgnSelect
+
+            # compute metrics and plot
+
+            # (rgn01) evaluate the annual cycle
+            ans = raw_input('Do you want evaluate the annual cycle? [y/n]\n> ').lower()
+            if ans == 'y':                     # analysis for a single region
+                if rgnSelect >= 0:
+                    obsAnnCyc = calc_annual_cycle_means(oData[rgnSelect, :], Times)
+                    mdlAnnCyc = ma.empty((numMdl, 12))
+                    for n in np.arange(numMdl):
+                        mdlAnnCyc[n, :] = calc_annual_cycle_means(mData[n, rgnSelect, :], Times)
+               else:                           # analysis for the entire subregions
+                    mdlAnnCyc = ma.empty((numSubRgn, 12))
+                    mdlAnnCyc = ma.empty((numMdl, numSubRgn, 12))
+                    for r in np.arange(numSubRgn):
+                        obsAnnCyc[r,:] = calc_annual_cycle_means(oData[r, :], Times)
+                    for n in np.arange(numMdl):
+                        for r in np.arange(numSubRgn):
+                            mdlAnnCyc[n, r, :] = calc_annual_cycle_means(mData[n, r, :], Times)
+            print 'obsAnnCyc= ', obsAnnCyc[1, :]
+            for n in np.arange(numMdl):
+                print 'mdlAnnCyc= ', mdlAnnCyc[n, 1, :]
+
+            #--------------------------------
+            # (mp.008) Select performance metric
+            #----------------------------------------------------------------------------------------------------
+            #metricOption = misc.select_metrics()
+            # Temporarily, compute the RMSE and pattern correlation for the simulated 
+            # and observed annual cycle based on monthly means
+            # TODO tempRMS and tempCOR are not used in the code and throwing errors.
+#            tempRMS = calc_rms(mdlAnnCyc, obsAnnCyc)
+#            tempCOR = calc_temporal_pat_cor(mdlAnnCyc, obsAnnCyc)
+
+            #--------------------------------
+            # (mp.009) Plot results
+            #----------------------------------------------------------------------------------------------------
+            # Data-specific plot options: i.e. adjust model data units & set plot color bars
+            colorbar = 'rainbow'
+            if varName == 'pr':                      # set color bar for prcp
+                colorbar = 'precip2_17lev'
+
+            # 1-d data, e.g. Time series plots
+            plotFileName = 'anncyc_' + varName + '_' + subRegions[rgnSelect].name
+            if(os.path.exists(plotFileName) == True):
+                cmnd = 'rm -f ' + plotFileName
+                subprocess.call(cmnd, shell = True)
+            year_labels = False         # for annual cycle plots
+            mytitle = 'Annual Cycle of ' + varName + ' at Sub-Region ' + subRegions[rgnSelect].name
+            # Create a list of datetimes to represent the annual cycle, one per month.
+            times = []
+            for m in xrange(12):
+                times.append(datetime.datetime(2000, m + 1, 1, 0, 0, 0, 0))
+            #for i in np.arange(12):
+            #  times.append(i+1)
+            _ = plots.draw_time_series_plot(mdlAnnCyc, times, plotFileName, workdir, 
+                                                   data2 = obsAnnCyc, mytitle = mytitle, ytitle = 'Y', 
+                                                   xtitle = 'MONTH', year_labels = year_labels)
+
+            # Repeat for another metric
+            doMetricsOption = raw_input('Do you want to perform another evaluation? [y/n]\n> ').lower()
+
+    # Processing complete if a user enters 'n' for 'doMetricsOption'
+    print 'RCMES processing completed.'
+
+

Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/temp.metrics
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/__init__.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/__init__.py?rev=1517753&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/__init__.py (added)
+++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/__init__.py Tue Aug 27 05:35:42 2013
@@ -0,0 +1,2 @@
+"""The toolkit Package is a collection of modules that provide a set of tools
+that can be used to process, analyze and plot the analysis results."""
\ No newline at end of file

Propchange: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/__init__.py
------------------------------------------------------------------------------
    svn:executable = *



Mime
View raw message