Return-Path: X-Original-To: apmail-climate-commits-archive@minotaur.apache.org Delivered-To: apmail-climate-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 73D4610348 for ; Tue, 27 Aug 2013 05:37:03 +0000 (UTC) Received: (qmail 93141 invoked by uid 500); 27 Aug 2013 05:37:02 -0000 Delivered-To: apmail-climate-commits-archive@climate.apache.org Received: (qmail 93107 invoked by uid 500); 27 Aug 2013 05:37:01 -0000 Mailing-List: contact commits-help@climate.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@climate.incubator.apache.org Delivered-To: mailing list commits@climate.incubator.apache.org Received: (qmail 93099 invoked by uid 99); 27 Aug 2013 05:37:01 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 27 Aug 2013 05:37:01 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 27 Aug 2013 05:36:56 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 532562388BCD; Tue, 27 Aug 2013 05:36:01 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1517753 [9/33] - in /incubator/climate/branches/rcmet-2.1.1: ./ src/ src/main/ src/main/python/ src/main/python/bin/ src/main/python/docs/ src/main/python/docs/_static/ src/main/python/docs/_templates/ src/main/python/rcmes/ src/main/pytho... Date: Tue, 27 Aug 2013 05:35:49 -0000 To: commits@climate.incubator.apache.org From: pramirez@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130827053601.532562388BCD@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Added: incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py?rev=1517753&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py (added) +++ incubator/climate/branches/rcmet-2.1.1/src/main/python/rcmes/toolkit/metrics.py Tue Aug 27 05:35:42 2013 @@ -0,0 +1,2455 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +''' +Module storing functions to calculate statistical metrics from numpy arrays +''' + +import datetime +import subprocess +import sys +import os +import numpy as np +import numpy.ma as ma +from math import floor, log +from toolkit import process +from utils import misc +from storage import files +from pylab import * +import scipy.stats.mstats as mstats +import matplotlib as mpl +import matplotlib.dates +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import ImageGrid +from matplotlib.font_manager import FontProperties +from mpl_toolkits.basemap import Basemap +from utils.Taylor import TaylorDiagram +# 6/10/2012 JK: any Ngl dependence will be removed in later versions +#import Ngl + +def calc_ann_mean(t2, time): + ''' + Calculate annual cycle in terms of monthly means at every grid point. + ''' + # Calculate annual cycle in terms of monthly means at every grid point including single point case (ndim=1) + # note: this routine is identical to 'calc_annual_cycle_means': must be converted to calculate the annual mean + # Extract months from time variable + months = np.empty(len(time)) + for t in np.arange(len(time)): + months[t] = time[t].month + if t2.ndim == 3: + means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means + # Calculate means month by month + for i in np.arange(12)+1: + means[i - 1, :, :] = t2[months == i, :, :].mean(0) + if t2.ndim == 1: + means = np.empty((12)) # empty array to store means + # Calculate means month by month + for i in np.arange(12)+1: + means[i - 1] = t2[months == i].mean(0) + return means + + +def calc_clim_month(t2, time): + ''' + Calculate monthly means at every grid point. + ''' + # Calculate monthly means at every grid point including single point case (ndim=1) + # Extract months from time variable + months = np.empty(len(time)) + for t in np.arange(len(time)): + months[t] = time[t].month + if t2.ndim == 3: + means = ma.empty((12, t2.shape[1], t2.shape[2])) # empty array to store means + # Calculate means month by month + for i in np.arange(12) + 1: + means[i - 1, :, :] = t2[months == i, :, :].mean(0) + if t2.ndim == 1: + means = np.empty((12)) # empty array to store means + # Calculate means month by month + for i in np.arange(12) + 1: + means[i - 1] = t2[months == i].mean(0) + return means + + +def calc_clim_year(nYR, nT, ngrdY, ngrdX, t2, time): + ''' + Calculate annual mean timeseries and climatology for both 2-D and point time series. + ''' + # Extract months from time variable + yy = np.empty(nT) + mm = np.empty(nT) + for t in np.arange(nT): + yy[t] = time[t].year + mm[t] = time[t].month + if t2.ndim == 3: + tSeries = ma.zeros((nYR, ngrdY, ngrdX)) + i = 0 + for myunit in np.unique(yy): + wh = (yy == myunit) + data = t2[wh, :, :] + tSeries[i, :, :] = ma.average(data, axis = 0) + #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy + i += 1 + means = ma.zeros((ngrdY, ngrdX)) + means = ma.average(tSeries, axis = 0) + elif t2.ndim == 1: + tSeries = ma.zeros((nYR)) + i = 0 + for myunit in np.unique(yy): + wh = (yy == myunit) + data = t2[wh] + tSeries[i] = ma.average(data, axis = 0) + #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy + i += 1 + means = ma.zeros((ngrdY, ngrdX)) + means = ma.average(tSeries, axis = 0) + return tSeries, means + + +def calc_clim_season(nYR, nT, mB, mE, ngrdY, ngrdX, t2, time): + ''' + Calculate seasonal mean timeseries and climatology for both 2-D and point time series. + ''' + #------------------------------------------------------------------------------------- + # Calculate seasonal mean timeseries and climatology for both 2-d and point time series + # The season to be calculated is defined by moB and moE; moE>=moB always + #------------------------------------------------------------------------------------- + # Extract months from time variable + yy = np.empty(nT) + mm = np.empty(nT) + for t in np.arange(nT): + yy[t] = time[t].year + mm[t] = time[t].month + if t2.ndim == 3: + tSeries = ma.zeros((nYR, ngrdY, ngrdX)) + i = 0 + for myunit in np.unique(yy): + wh = (yy == myunit) & (mm >= mB) & (mm <= mE) + data = t2[wh, :, :] + tSeries[i, :, :] = ma.average(data, axis = 0) + #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy + i += 1 + means = ma.zeros((ngrdY, ngrdX)) + means = ma.average(tSeries, axis = 0) + elif t2.ndim == 1: + tSeries = ma.zeros((nYR)) + i = 0 + for myunit in np.unique(yy): + wh = (yy == myunit) & (mm >= mB) & (mm <= mE) + data = t2[wh] + tSeries[i] = ma.average(data, axis = 0) + #print 'data.shape= ',data.shape,' i= ',i,' yy= ',yy + i += 1 + means = ma.zeros((ngrdY, ngrdX)) + means = ma.average(tSeries, axis = 0) + return tSeries, means + + +def calc_clim_mo(nYR, nT, ngrdY, ngrdX, t2, time): + ''' + Calculate monthly means at every grid points and the annual time series of single model including single point case (ndim=1). + JK20: This is modified from 'calc_clim_month' with additional arguments & output, the annual time series of single model output (mData) + 6/8/2013: bug fix: mm = months[t] --> mm = months[t] - 1, otherwise array overflow occurs + ''' + # Extract months and monthly time series from the time and raw variable, respectively + months = np.empty(nT) + for t in np.arange(nT): + months[t] = time[t].month + if t == 0: + yy0 = time[t].year + # for a 2-D time series data + if t2.ndim == 3: + mData = ma.empty((nYR, 12, ngrdY, ngrdX)) + for t in np.arange(nT): + yy = time[t].year + mm = months[t] - 1 + yr = yy - yy0 + mData[yr, mm, :, :] = t2[t, :, :] + # Calculate means month by month. means is an empty array to store means + means = ma.empty((12, ngrdY, ngrdX)) + for i in np.arange(12) + 1: + means[i - 1, :, :] = t2[months == i, :, :].mean(0) + # for a point time series data + if t2.ndim == 1: + mData = ma.empty((nYR, 12)) + for t in np.arange(nT): + yy = time[t].year + mm = months[t] + yr = yy - yy0 + mData[yr, mm] = t2[t] + means = np.empty((12)) + # Calculate means month by month. means is an empty array to store means + for i in np.arange(12) + 1: + means[i - 1] = t2[months == i].mean(0) + return mData, means + + +def calc_clim_One_month(moID, nYR, nT, t2, time): + ''' + Calculate the montly mean at every grid point for a specified month. + ''' + #------------------------------------------------------------------------------------- + # Calculate monthly means at every grid point for a specified month + #------------------------------------------------------------------------------------- + # Extract months and the corresponding time series from time variable + months = np.empty(nT) + for t in np.arange(nT): + months[t] = time[t].month + if t2.ndim == 3: + mData = ma.empty((nYR, t2.shape[1], t2.shape[2])) # empty array to store time series + n = 0 + if months[t] == moID: + mData[n, :, :] = t2[t, :, :] + n += 1 + means = ma.empty((t2.shape[1], t2.shape[2])) # empty array to store means + # Calculate means for the month specified by moID + means[:, :] = t2[months == moID, :, :].mean(0) + return mData, means + + +def calc_annual_cycle_means(data, time): + ''' + Calculate monthly means for every grid point + + Inputs:: + data - masked 3d array of the model data (time, lon, lat) + time - an array of python datetime objects + ''' + # Extract months from time variable + months = np.empty(len(time)) + + for t in np.arange(len(time)): + months[t] = time[t].month + + #if there is data varying in t and space + if data.ndim == 3: + # empty array to store means + means = ma.empty((12, data.shape[1], data.shape[2])) + + # Calculate means month by month + for i in np.arange(12) + 1: + means[i - 1, :, :] = data[months == i, :, :].mean(0) + + #if the data is a timeseries over area-averaged values + if data.ndim == 1: + # TODO - Investigate using ma per KDW + means = np.empty((12)) # empty array to store means??WHY NOT ma? + + # Calculate means month by month + for i in np.arange(12) + 1: + means[i - 1] = data[months == i].mean(0) + + return means + + +def calc_annual_cycle_std(data, time): + ''' + Calculate monthly standard deviations for every grid point + ''' + # Extract months from time variable + months = np.empty(len(time)) + + for t in np.arange(len(time)): + months[t] = time[t].month + + # empty array to store means + stds = np.empty((12, data.shape[1], data.shape[2])) + + # Calculate means month by month + for i in np.arange(12) + 1: + stds[i - 1, :, :] = data[months == i, :, :].std(axis = 0, ddof = 1) + + return stds + + +def calc_annual_cycle_domain_means(data, time): + ''' + Calculate domain means for each month of the year + ''' + # Extract months from time variable + months = np.empty(len(time)) + + for t in np.arange(len(time)): + months[t] = time[t].month + + means = np.empty(12) # empty array to store means + + # Calculate means month by month + for i in np.arange(12) + 1: + means[i - 1] = data[months == i, :, :].mean() + + return means + + +def calc_annual_cycle_domain_std(data, time): + ''' + Calculate domain standard deviations for each month of the year + ''' + # Extract months from time variable + months = np.empty(len(time)) + + for t in np.arange(len(time)): + months[t] = time[t].month + + stds = np.empty(12) # empty array to store means + + # Calculate means month by month + for i in np.arange(12) + 1: + stds[i - 1] = data[months == i, :, :].std(ddof = 1) + + return stds + + +def calc_bias_annual(t1, t2, optn): # Mean Bias + ''' + Calculate the mean difference between two fields over time for each grid point. + ''' + # Calculate mean difference between two fields over time for each grid point + # Precrocessing of both obs and model data ensures the absence of missing values + diff = t1-t2 + if(open == 'abs'): + diff = abs(diff) + bias = diff.mean(axis = 0) + return bias + + +def calc_bias(t1, t2): + ''' + Calculate mean difference between two fields over time for each grid point + + Classify missing data resulting from multiple times (using threshold + data requirement) + + i.e. if the working time unit is monthly data, and we are dealing with + multiple months of data then when we show mean of several months, we need + to decide what threshold of missing data we tolerate before classifying a + data point as missing data. + ''' + t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75) + t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75) + + diff = t1 - t2 + bias = diff.mean(axis = 0) + + # Set mask for bias metric using missing data in obs or model data series + # i.e. if obs contains more than threshold (e.g.50%) missing data + # then classify time average bias as missing data for that location. + bias = ma.masked_array(bias.data, np.logical_or(t1Mask, t2Mask)) + return bias + + +def calc_bias_dom(t1, t2): + ''' + Calculate domain mean difference between two fields over time + ''' + diff = t1 - t2 + bias = diff.mean() + return bias + + +def calc_difference(t1, t2): + ''' + Calculate mean difference between two fields over time for each grid point + ''' + print 'Calculating difference' + diff = t1 - t2 + return diff + + +def calc_mae(t1, t2): + ''' + Calculate mean difference between two fields over time for each grid point + + Classify missing data resulting from multiple times (using threshold + data requirement) + + i.e. if the working time unit is monthly data, and we are dealing with + multiple months of data then when we show mean of several months, we need + to decide what threshold of missing data we tolerate before classifying + a data point as missing data. + ''' + t1Mask = process.create_mask_using_threshold(t1, threshold = 0.75) + t2Mask = process.create_mask_using_threshold(t2, threshold = 0.75) + + diff = t1 - t2 + adiff = abs(diff) + + mae = adiff.mean(axis = 0) + + # Set mask for mae metric using missing data in obs or model data series + # i.e. if obs contains more than threshold (e.g.50%) missing data + # then classify time average mae as missing data for that location. + mae = ma.masked_array(mae.data, np.logical_or(t1Mask, t2Mask)) + return mae + + +def calc_mae_dom(t1, t2): + ''' + Calculate domain mean difference between two fields over time + ''' + diff = t1 - t2 + adiff = abs(diff) + mae = adiff.mean() + return mae + + +def calc_rms(t1, t2): + ''' + Calculate mean difference between two fields over time for each grid point + ''' + diff = t1 - t2 + sqdiff = diff ** 2 + msd = sqdiff.mean(axis = 0) + rms = np.sqrt(msd) + return rms + + +def calc_rms_dom(t1, t2): + ''' + Calculate RMS differences between two fields + ''' + diff = t1 - t2 + sqdiff = diff ** 2 + msd = sqdiff.mean() + rms = np.sqrt(msd) + return rms + + +def calc_temporal_stdv(t1): + ''' + Calculate the temporal standard deviation. + + Input: + t1 - data array of any shape + + Output: + A 2-D array of temporal standard deviation + ''' + # TODO Make sure the first dimension of t1 is teh time axis. + stdv = t1.std(axis = 0) + return stdv + + +def calc_temporal_anom_cor(mD, oD): + ''' + Calculate the temporal anomaly correlation. + + Assumption(s); + The first dimension of mD and oD is the time axis. + + Input: + mD - model data array of any shape + oD - observation data array of any shape + + Output: + A 2-D array of time series pattern correlation coefficients at each grid point. + + REF: 277-281 in Stat methods in atmos sci by Wilks, 1995, Academic Press, 467pp. + ''' + mo = oD.mean(axis = 0) + nt = oD.shape[0] + deno1 = ((mD - mo) * (mD - mo)).sum(axis = 0) + deno2 = ((oD - mo) * (oD - mo)).sum(axis = 0) + patcor = ((mD - mo) * (oD - mo)).sum(axis = 0) / sqrt(deno1 * deno2) + return patcor + + +def calc_spatial_anom_cor(mD, oD): + ''' + Calculate anomaly correlation between two 2-D arrays. + + Input: + mD - 2-D array of model data + oD - 2-D array of observation data + + Output: + The anomaly correlation between the two input arrays. + ''' + mo = oD.mean() + d1 = ((mD - mo)*(mD - mo)).sum() + d2 = ((oD - mo)*(oD - mo)).sum() + patcor = ((mD - mo) * (oD - mo)).sum() / sqrt(d1 * d2) + return patcor + + +def calc_temporal_pat_cor(t1, t2): + ''' + Calculate the Temporal Pattern Correlation + + Input:: + t1 - 3d array of model data + t2 - 3d array of obs data + + Output:: + 2d array of time series pattern correlation coefficients at each grid point. + **Note:** std_dev is standardized on 1 degree of freedom + ''' + mt1 = t1.mean(axis = 0) + mt2 = t2.mean(axis = 0) + nt = t1.shape[0] + sigma_t1 = t1.std(axis = 0, ddof = 1) + sigma_t2 = t2.std(axis = 0, ddof = 1) + patcor = ((((t1 - mt1) * (t2 - mt2)).sum(axis = 0)) / (nt)) / (sigma_t1 * sigma_t2) + + return patcor + + +def calc_spatial_pat_cor(t1, t2): + ''' + Calcualte pattern correlation between 2-D arrays. + 6/10/2013: JK: Enforce both t1 & t2 have the identical mask before calculating std and corr + + Input: + t1 - 2-D array of model data + t2 - 2-D array of observation data + + Output: + Pattern correlation between two input arrays. + ''' + import numpy as np + msk1 = ma.getmaskarray(t1) + msk2 = ma.getmaskarray(t2) + t1 = ma.masked_array(t1.data, np.logical_or(msk1, msk2)) + t2 = ma.masked_array(t2.data, np.logical_or(msk1, msk2)) + np = ma.count(t1) + mt1 = t1.mean() + mt2 = t2.mean() + st1 = t1.std() + st2 = t2.std() + patcor = ((t1 - mt1) * (t2 - mt2)).sum() / (np * st1 * st2) + return patcor + + +def calc_pat_cor2D(t1, t2, nT): + ''' + Calculate the pattern correlation between 3-D input arrays. + + Input: + t1 - 3-D array of model data + t2 - 3-D array of observation data + nT + + Output: + 1-D array (time series) of pattern correlation coefficients. + ''' + # TODO - Update docstring. What is nT? + nt = t1.shape[0] + if(nt != nT): + print 'input time levels do not match: Exit', nT, nt + return -1 + # store results in list for convenience (then convert to numpy array at the end) + patcor = [] + for t in xrange(nt): + mt1 = t1[t, :, :].mean() + mt2 = t2[t, :, :].mean() + sigma_t1 = t1[t, :, :].std() + sigma_t2 = t2[t, :, :].std() + # TODO: make means and standard deviations weighted by grid box area. + patcor.append((((((t1[t, :, :] - mt1) * (t2[t, :, :] - mt2)).sum()) / + (t1.shape[1] * t1.shape[2]) ) / (sigma_t1 * sigma_t2))) + print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t] + # TODO: deal with missing data appropriately, i.e. mask out grid points with missing data above tolerence level + # convert from list into numpy array + patcor = numpy.array(patcor) + print patcor.shape + return patcor + +def calc_pat_cor(dataset_1, dataset_2): + ''' + Purpose: Calculate the Pattern Correlation Timeseries + Assumption(s):: + Both dataset_1 and dataset_2 are the same shape. + * lat, lon must match up + * time steps must align (i.e. months vs. months) + Input:: + dataset_1 - 3d (time, lat, lon) array of data + dataset_2 - 3d (time, lat, lon) array of data + Output: + patcor - a 1d array (time series) of pattern correlation coefficients. + **Note:** Standard deviation is using 1 degree of freedom. Debugging print + statements to show the difference the n-1 makes. http://docs.scipy.org/doc/numpy/reference/generated/numpy.std.html + 6/17/2013 JK: Add an option for a 1-d arrays + ''' + + # TODO: Add in try block to ensure the shapes match + + nDim1 = dataset_1.ndim + nDim2 = dataset_2.ndim + if nDim1 != nDim2: + print 'dimension mismatch in calc_pat_cor: exit', nDim1,nDim2 + sys.exit() + + if nDim1 == 1: + mt1 = dataset_1.mean() + mt2 = dataset_2.mean() + nt = dataset_1.shape[0] + sigma_t1 = dataset_1.std() + sigma_t2 = dataset_2.std() + patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / (nt * sigma_t1 * sigma_t2) + + elif nDim1 == 2: + # find mean and std_dev + mt1 = dataset_1.mean() + mt2 = dataset_2.mean() + ny = dataset_1.shape[0] + nx = dataset_1.shape[1] + sigma_t1 = dataset_1.std() + sigma_t2 = dataset_2.std() + patcor=((dataset_1 - mt1) * (dataset_2 - mt2)).sum() / ((ny * nx) * (sigma_t1 * sigma_t2)) + + elif nDim1 == 3: + nt = dataset_1.shape[0] + ny = dataset_1.shape[1] + nx = dataset_1.shape[2] + # store results in list for convenience (then convert to numpy array) + patcor = [] + for t in xrange(nt): + # find mean and std_dev + mt1 = dataset_1[t, :, :].mean() + mt2 = dataset_2[t, :, :].mean() + sigma_t1 = dataset_1[t, :, :].std(ddof = 1) + sigma_t2 = dataset_2[t, :, :].std(ddof=1) + # TODO: make means and standard deviations weighted by grid box area. + # Equation from Santer_et_al 1995 + # patcor = (1/(N*M_std*O_std))*sum((M_i-M_bar)*(O_i-O_bar)) + patcor.append((((((dataset_1[t, :, :] - mt1) * (dataset_2[t, :, :] - mt2)).sum()) / (ny * nx)) / (sigma_t1 * sigma_t2))) + print t, mt1.shape, mt2.shape, sigma_t1.shape, sigma_t2.shape, patcor[t] + # TODO: deal with missing data appropriately, i.e. mask out grid points + # with missing data above tolerance level + # convert from list into numpy array + patcor = np.array(patcor) + + print patcor.shape, patcor + return patcor + + +def calc_anom_corn(dataset_1, dataset_2, climatology = None): + ''' + Calculate the anomaly correlation. + + Input: + dataset_1 - First input dataset + dataset_2 - Second input dataset + climatology - Optional climatology input array. Assumption is that it is for + the same time period by default. + + Output: + The anomaly correlation. + ''' + # TODO: Update docstring with actual useful information + + # store results in list for convenience (then convert to numpy array) + anomcor = [] + nt = dataset_1.shape[0] + #prompt for the third file, i.e. climo file... + #include making sure the lat, lon and times are ok for comparision + # find the climo in here and then using for eg, if 100 yrs + # is given for the climo file, but only looking at 10yrs + # ask if want to input climo dataset for use....if no, call previous + + if climatology != None: + climoFileOption = raw_input('Would you like to use the full observation dataset as \ + the climatology in this calculation? [y/n] \n>') + if climoFileOption == 'y': + base_dataset = climatology + else: + base_dataset = dataset_2 + for t in xrange(nt): + mean_base = base_dataset[t, :, :].mean() + anomcor.append((((dataset_1[t, :, :] - mean_base) * (dataset_2[t, :, :] - mean_base)).sum()) / + np.sqrt(((dataset_1[t, :, :] - mean_base) ** 2).sum() * + ((dataset_2[t, :, :] - mean_base) ** 2).sum())) + print t, mean_base.shape, anomcor[t] + + # TODO: deal with missing data appropriately, i.e. mask out grid points + # with missing data above tolerence level + + # convert from list into numpy array + anomcor = np.array(anomcor) + print anomcor.shape, anomcor.ndim, anomcor + return anomcor + + +def calc_anom_cor(t1, t2): + ''' + Calculate the Anomaly Correlation (Deprecated) + ''' + + nt = t1.shape[0] + + # store results in list for convenience (then convert to numpy + # array at the end) + anomcor = [] + for t in xrange(nt): + + mt2 = t2[t, :, :].mean() + + sigma_t1 = t1[t, :, :].std(ddof = 1) + sigma_t2 = t2[t, :, :].std(ddof = 1) + + # TODO: make means and standard deviations weighted by grid box area. + + anomcor.append(((((t1[t, :, :] - mt2) * (t2[t, :, :] - mt2)).sum()) / + (t1.shape[1] * t1.shape[2])) / (sigma_t1 * sigma_t2)) + + print t, mt2.shape, sigma_t1.shape, sigma_t2.shape, anomcor[t] + + # TODO: deal with missing data appropriately, i.e. mask out grid points with + # missing data above tolerence level + + # convert from list into numpy array + anomcor = np.array(anomcor) + print anomcor.shape, anomcor.ndim, anomcor + return anomcor + + +def calc_nash_sutcliff(dataset_1, dataset_2): + ''' + Routine to calculate the Nash-Sutcliff coefficient of efficiency (E) + + Assumption(s):: + Both dataset_1 and dataset_2 are the same shape. + * lat, lon must match up + * time steps must align (i.e. months vs. months) + + Input:: + dataset_1 - 3d (time, lat, lon) array of data + dataset_2 - 3d (time, lat, lon) array of data + + Output: + nashcor - 1d array aligned along the time dimension of the input + datasets. Time Series of Nash-Sutcliff Coefficient of efficiency + + ''' + + nt = dataset_1.shape[0] + nashcor = [] + for t in xrange(nt): + mean_dataset_2 = dataset_2[t, :, :].mean() + + nashcor.append(1 - ((((dataset_2[t, :, :] - dataset_1[t, :, :]) ** 2).sum()) / + ((dataset_2[t, :, :] - mean_dataset_2) ** 2).sum())) + + print t, mean_dataset_2.shape, nashcor[t] + + nashcor = np.array(nashcor) + print nashcor.shape, nashcor.ndim, nashcor + return nashcor + + +def calc_pdf(dataset_1, dataset_2): + ''' + Routine to calculate a normalized Probability Distribution Function with + bins set according to data range. + Equation from Perkins et al. 2007 + + PS=sum(min(Z_O_i, Z_M_i)) where Z is the distribution (histogram of the data for either set) + called in do_rcmes_processing_sub.py + + Inputs:: + 2 arrays of data + t1 is the modelData and t2 is 3D obsdata - time,lat, lon NB, time here + is the number of time values eg for time period 199001010000 - 199201010000 + + if annual means-opt 1, was chosen, then t2.shape = (2,lat,lon) + + if monthly means - opt 2, was choosen, then t2.shape = (24,lat,lon) + + User inputs: number of bins to use and edges (min and max) + Output: + + one float which represents the PDF for the year + + TODO: Clean up this docstring so we have a single purpose statement + + Routine to calculate a normalised PDF with bins set according to data range. + + Input:: + 2 data arrays, modelData and obsData + + Output:: + PDF for the year + + ''' + #list to store PDFs of modelData and obsData + pdf_mod = [] + pdf_obs = [] + # float to store the final PDF similarity score + similarity_score = 0.0 + d1_max = dataset_1.amax() + d1_min = dataset_1.amin() + + print 'min modelData', dataset_1[:, :, :].min() + print 'max modelData', dataset_1[:, :, :].max() + print 'min obsData', dataset_2[:, :, :].min() + print 'max obsData', dataset_2[:, :, :].max() + # find a distribution for the entire dataset + #prompt the user to enter the min, max and number of bin values. + # The max, min info above is to help guide the user with these choises + print '****PDF input values from user required **** \n' + nbins = int (raw_input('Please enter the number of bins to use. \n')) + minEdge = float(raw_input('Please enter the minimum value to use for the edge. \n')) + maxEdge = float(raw_input('Please enter the maximum value to use for the edge. \n')) + + mybins = np.linspace(minEdge, maxEdge, nbins) + print 'nbins is', nbins, 'mybins are', mybins + + + # TODO: there is no 'new' kwargs for numpy.histogram + # per: http://docs.scipy.org/doc/numpy/reference/generated/numpy.histogram.html + # PLAN: Replace new with density param. + pdf_mod, edges = np.histogram(dataset_1, bins = mybins, normed = True, new = True) + print 'dataset_1 distribution and edges', pdf_mod, edges + pdf_obs, edges = np.histogram(dataset_2, bins = mybins, normed = True, new = True) + print 'dataset_2 distribution and edges', pdf_obs, edges + + # TODO: drop this + """ + considering using pdf function from statistics package. It is not + installed. Have to test on Mac. + http://bonsai.hgc.jp/~mdehoon/software/python/Statistics/manual/index.xhtml#TOC31 + pdf_mod, edges = stats.pdf(dataset_1, bins=mybins) + print 'dataset_1 distribution and edges', pdf_mod, edges + pdf_obs,edges=stats.pdf(dataset_2,bins=mybins) + print 'dataset_2 distribution and edges', pdf_obs, edges + """ + + #find minimum at each bin between lists + i = 0 + for model_value in pdf_mod : + print 'model_value is', model_value, 'pdf_obs[', i, '] is', pdf_obs[i] + if model_value < pdf_obs[i]: + similarity_score += model_value + else: + similarity_score += pdf_obs[i] + i += 1 + print 'similarity_score is', similarity_score + return similarity_score + + +def calc_stdev(t1): + ''' + Calculate the standard deviation for a given dataset. + + Input: + t1 - Dataset to calculate the standard deviation on. + + Output: + Array of the standard deviations for each month in the provided dataset. + ''' + nt = t1.shape[0] + sigma_t1 = [] + for t in xrange(nt): + sigma_t1.append(t1[t, :, :].std(ddof = 1)) + sigma_t1 = np.array(sigma_t1) + print sigma_t1, sigma_t1.shape + return sigma_t1 + +# 6/10/2013 JK: plotting routines are added below + +def pow_round(x): + ''' + Function to round x to the nearest power of 10 + ''' + return 10 ** (floor(log(x, 10) - log(0.5, 10))) + +def calcNiceIntervals(data, nLevs): + ''' + Purpose:: + Calculates nice intervals between each color level for colorbars + and contour plots. The target minimum and maximum color levels are + calculated by taking the minimum and maximum of the distribution + after cutting off the tails to remove outliers. + + Input:: + data - an array of data to be plotted + nLevs - an int giving the target number of intervals + + Output:: + cLevs - A list of floats for the resultant colorbar levels + ''' + # Find the min and max levels by cutting off the tails of the distribution + # This mitigates the influence of outliers + data = data.ravel() + mnLvl = mstats.scoreatpercentile(data, 5) + mxLvl = mstats.scoreatpercentile(data, 95) + locator = mpl.ticker.MaxNLocator(nLevs) + cLevs = locator.tick_values(mnLvl, mxLvl) + + # Make sure the bounds of cLevs are reasonable since sometimes + # MaxNLocator gives values outside the domain of the input data + cLevs = cLevs[(cLevs >= mnLvl) & (cLevs <= mxLvl)] + return cLevs + +def calcBestGridShape(nPlots, oldShape): + ''' + Purpose:: + Calculate a better grid shape in case the user enters more columns + and rows than needed to fit a given number of subplots. + + Input:: + nPlots - an int giving the number of plots that will be made + oldShape - a tuple denoting the desired grid shape (nRows, nCols) for arranging + the subplots originally requested by the user. + + Output:: + newShape - the smallest possible subplot grid shape needed to fit nPlots + ''' + nRows, nCols = oldShape + size = nRows * nCols + diff = size - nPlots + if diff < 0: + raise ValueError('gridShape=(%d, %d): Cannot fit enough subplots for data' %(nRows, nCols)) + else: + # If the user enters an excessively large number of + # rows and columns for gridShape, automatically + # correct it so that it fits only as many plots + # as needed + while diff >= nCols: + nRows -= 1 + size = nRows * nCols + diff = size - nPlots + + # Don't forget to remove unnecessary columns too + if nRows == 1: + nCols = nPlots + + newShape = nRows, nCols + return newShape + +def drawPortraitDiagramSingle(data, rowLabels, colLabels, cLevs, fName, fType = 'png', + xLabel = '', yLabel = '', cLabel = '', pTitle = '', cMap = None): + ''' + Purpose:: + Makes a portrait diagram plot. + + Input:: + data - 2d array of the field to be plotted + rowLabels - list of strings denoting labels for each row + colLabels - list of strings denoting labels for each column + cLevs - a list of integers or floats specifying the colorbar levels + xLabel - a string specifying the x-axis title + yLabel - a string specifying the y-axis title + cLabel - a string specifying the colorbar title + pTitle - a string specifying the plot title + fName - a string specifying the filename of the plot + fType - an optional string specifying the filetype, default is .png + cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap, + ''' + # Set up the colormap if not specified + if cMap is None: + cMap = plt.cm.RdBu_r + + # Set up figure and axes + fig = plt.figure() + ax = fig.add_subplot(111) + + # Make the portrait diagram + norm = mpl.colors.BoundaryNorm(cLevs, cMap.N) + cs = ax.matshow(data, cmap = cMap, aspect = 'auto', origin = 'lower', norm = norm) + + # Add colorbar + cbar = fig.colorbar(cs, norm = norm, boundaries = cLevs, drawedges = True, + pad = .05) + cbar.set_label(cLabel) + cbar.set_ticks(cLevs) + cbar.ax.xaxis.set_ticks_position("none") + cbar.ax.yaxis.set_ticks_position("none") + + # Add grid lines + ax.xaxis.set_ticks(np.arange(data.shape[1] + 1)) + ax.yaxis.set_ticks(np.arange(data.shape[0] + 1)) + x = (ax.xaxis.get_majorticklocs() - .5) + y = (ax.yaxis.get_majorticklocs() - .5) + ax.vlines(x, y.min(), y.max()) + ax.hlines(y, x.min(), x.max()) + + # Configure ticks + ax.xaxis.tick_bottom() + ax.xaxis.set_ticks_position('none') + ax.yaxis.set_ticks_position('none') + ax.set_xticklabels(rowLabels) + ax.set_yticklabels(colLabels) + + # Add labels and title + ax.set_xlabel(xLabel) + ax.set_ylabel(yLabel) + ax.set_title(pTitle) + + # Save the figure + fig.savefig('%s.%s' %(fName, fType)) + plt.show() + +def drawPortraitDiagram(dataset, rowLabels, colLabels, fName, fType = 'png', + gridShape = (1, 1), xLabel = '', yLabel = '', cLabel = '', + pTitle = '', subTitles = None, cMap = None, cLevs = None, + nLevs = 10, extend = 'neither'): + ''' + Purpose:: + Makes a portrait diagram plot. + + Input:: + dataset - 3d array of the field to be plotted (nT, nRows, nCols) + rowLabels - a list of strings denoting labels for each row + colLabels - a list of strings denoting labels for each column + fName - a string specifying the filename of the plot + fType - an optional string specifying the output filetype + xLabel - an optional string specifying the x-axis title + yLabel - an optional string specifying the y-axis title + cLabel - an optional string specifying the colorbar title + pTitle - a string specifying the plot title + subTitles - an optional list of strings specifying the title for each subplot + cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap + cLevs - an optional list of ints or floats specifying colorbar levels + nLevs - an optional integer specifying the target number of contour levels if + cLevs is None + extend - an optional string to toggle whether to place arrows at the colorbar + boundaries. Default is 'neither', but can also be 'min', 'max', or + 'both'. Will be automatically set to 'both' if cLevs is None. + + ''' + # Handle the single plot case. + if dataset.ndim == 2 or (dataset.ndim == 3 and dataset.shape[0] == 1): + dataset = dataset.reshape(1, *dataset.shape) + + nPlots = dataset.shape[0] + + # Make sure gridShape is compatible with input data + gridShape = calcBestGridShape(nPlots, gridShape) + + # Row and Column labels must be consistent with the shape of + # the input data too + nRows, nCols = dataset.shape[1:] + if len(rowLabels) != nRows or len(colLabels) != nCols: + raise ValueError('rowLabels and colLabels must have %d and %d elements respectively' %(nRows, nCols)) + + # Set up the colormap if not specified + if cMap is None: + cMap = plt.cm.coolwarm + + # Set up the figure + fig = plt.figure() + fig.set_size_inches((8.5, 11.)) + fig.dpi = 300 + + # Make the subplot grid + grid = ImageGrid(fig, 111, + nrows_ncols = gridShape, + axes_pad = 0.4, + share_all = True, + aspect = False, + add_all = True, + ngrids = nPlots, + label_mode = "all", + cbar_mode = 'single', + cbar_location = 'bottom', + cbar_pad = '3%', + cbar_size = .15 + ) + + # Calculate colorbar levels if not given + if cLevs is None: + # Cut off the tails of the distribution + # for more representative colorbar levels + cLevs = calcNiceIntervals(dataset, nLevs) + extend = 'both' + + norm = mpl.colors.BoundaryNorm(cLevs, cMap.N) + + # Do the plotting + for i, ax in enumerate(grid): + data = dataset[i] + cs = ax.matshow(data, cmap = cMap, aspect = 'auto', origin = 'lower', norm = norm) + + # Add grid lines + ax.xaxis.set_ticks(np.arange(data.shape[1] + 1)) + ax.yaxis.set_ticks(np.arange(data.shape[0] + 1)) + x = (ax.xaxis.get_majorticklocs() - .5) + y = (ax.yaxis.get_majorticklocs() - .5) + ax.vlines(x, y.min(), y.max()) + ax.hlines(y, x.min(), x.max()) + + # Configure ticks + ax.xaxis.tick_bottom() + ax.xaxis.set_ticks_position('none') + ax.yaxis.set_ticks_position('none') + ax.set_xticklabels(colLabels, fontsize = 'xx-small') + ax.set_yticklabels(rowLabels, fontsize = 'xx-small') + + # Add axes title + if subTitles is not None: + ax.text(0.5, 1.04, subTitles[i], va = 'center', ha = 'center', + transform = ax.transAxes, fontsize = 'small') + + # Add colorbar + cbar = fig.colorbar(cs, cax = ax.cax, norm = norm, boundaries = cLevs, drawedges = True, + extend = extend, orientation = 'horizontal') + cbar.set_label(cLabel) + cbar.set_ticks(cLevs) + cbar.ax.xaxis.set_ticks_position("none") + cbar.ax.yaxis.set_ticks_position("none") + + # This is an ugly hack to make the title show up at the correct height. + # Basically save the figure once to achieve tight layout and calculate + # the adjusted heights of the axes, then draw the title slightly above + # that height and save the figure again + fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) + ymax = 0 + for ax in grid: + bbox = ax.get_position() + ymax = max(ymax, bbox.ymax) + + # Add figure title and axes labels + fig.text(.51, .14, yLabel, va = 'center', ha = 'center', rotation = 'horizontal') + fig.text(.08, .53, xLabel, va = 'center', ha = 'center', rotation = 'vertical') + fig.suptitle(pTitle, y = ymax + .04, fontsize = 16) + fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) + plt.show() + fig.clf() + +def taylorDiagram(pltDat, pltTit, pltFileName, refName, legendPos, radMax): + ''' + Draw a Taylor diagram + ''' + stdref = 1. # Standard reference value + rect = 111 # Subplot setting and location + markerSize = 6 + fig = plt.figure() + fig.suptitle(pltTit) # PLot title + dia = TaylorDiagram (stdref, fig = fig, radMax = radMax, rect = rect, label = refName) + for i,(stddev,corrcoef,name) in enumerate(pltDat): + dia.add_sample (stddev, corrcoef, marker = '$%d$' % (i+1), ms = markerSize, label=name) + # Add ploylines to mark a range specified by input data - 2 be implemented + #circular_line = dia.add_stddev_contours(0.959, 1, 0.965) + #circular_line = dia.add_stddev_contours(1.1, 1, 0.973) + #straight_line = dia.add_contours(0.959, 1, 1.1, 1) + #straight_line = dia.add_contours(0.959, 0.965, 1.1, 0.973) + #l=fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos) + # loc: 1='upper right', 2='upper left' or specified in the calling program via "legendPos" + l = fig.legend (dia.samplePoints, [p.get_label() for p in dia.samplePoints ], handlelength=0., prop={'size':10}, numpoints=1, loc=legendPos) + l.draw_frame(False) + plt.savefig(pltFileName) + plt.show() + pltDat = 0. + +def drawTimeSeriesSingle(dataset, times, labels, fName, fType = 'png', xLabel = '', yLabel ='', pTitle ='', + legendPos = 'upper center', legendFrameOn = False, yearLabels = True, yscale = 'linear'): + ''' + Purpose:: + Function to draw a time series plot + + Input:: + dataset - a list of arrays for each dataset as a time series + times - a list of python datetime objects + labels - a list of strings with the names of each dataset + fName - a string specifying the filename of the plot + fType - an optional string specifying the output filetype + xLabel - a string specifying the x-axis title + yLabel - a string specifying the y-axis title + pTitle - a string specifying the plot title + legendPos - an optional string or tuple of float for determining + the position of the legend + legendFrameOn - optional bool to toggle drawing the frame around + the legend + yearLabels - optional bool to toggle drawing year labels on the x-xaxis + yscale - optional string for setting the y-axis scale, 'linear' for linear + and 'log' for log base 10. + ''' + fig = plt.figure() + ax = fig.add_subplot(111) + + if not yearLabels: + xfmt = mpl.dates.DateFormatter('%b') + ax.xaxis.set_major_formatter(xfmt) + + ax.set_xlabel(xLabel) + ax.set_ylabel(yLabel) + ax.set_title(pTitle) + + # Set up list of lines for legend + lines = [] + ymin, ymax = 0, 0 + + # Plot each dataset + for data in dataset: + line = ax.plot_date(times, data, '', linewidth = 2) + lines.extend(line) + cmin, cmax = data.min(), data.max() + if ymin > cmin: + ymin = cmin + if ymax < cmax: + ymax = cmax + + # Add a bit of padding so lines don't touch bottom and top of the plot + ymin = ymin - ((ymax - ymin) * 0.1) + ymax = ymax + ((ymax - ymin) * 0.1) + ax.set_ylim((ymin, ymax)) + + # Set the y-axis scale + ax.set_yscale(yscale) + + # Create the legend + ax.legend((lines), labels, loc = legendPos, ncol = 10, fontsize='x-small', + frameon=legendFrameOn) + fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight') + plt.show() + fig.clf() + + +def pltXY(x, y, lineLabel, lineTyp, pltTit, xLabel, yLabel, pltName, xmin, xmax, deltaX, ymin, ymax, deltaY, legendPos, xScale, yScale): + """ + The default drawing order for axes is patches, lines, text.This order is determined by the zorder attribute. The following defaults are set: + Artist Z-order + Patch / PatchCollection 1 + Line2D / LineCollection 2 + Text 3 + You can change the order for individual artists by setting the zorder. Any individual plot() call can set a value + for the zorder of that particular item. + In the fist subplot below, the lines are drawn above the patch collection from the scatter, which is the default. + In the subplot below, the order is reversed. + The second figure shows how to control the zorder of individual lines. + Arguments + x (nX) : np array: the number of points in the x axis + y (nX,nY): np array: the number of y values to be plotted + lineLabel: list(nY): labels for individual y data + pltTit : Text : plot title + xLabel : Text : x-axis label + yLabel : Text : y-axis label + pltName : Text : name of the plot file + 3/28/2013 Jinwon Kim: Modification of a routine in the matplotlib gallery + """ + lineColors = ['k', 'b', 'r', 'g', 'c', 'm', 'y', '0.6', '0.7', '0.8', '0.9', '1.0'] + nX = x.size + nY = len(lineLabel) + lineColors[nY - 1] = 'b' + fig = plt.figure() + ax = fig.add_subplot(1,1,1) + for n in np.arange(nY): + plot(x,y[n, :], linewidth=1, color=lineColors[n], linestyle=lineTyp[n], label=lineLabel[n], zorder = 10) + xlabel(xLabel,fontsize=10); ylabel(yLabel,fontsize=10) + if xmax > xmin: + plt.xlim(xmin,xmax) + ticks = frange(xmin,xmax,npts=(xmax-xmin)/deltaX+1) + ax.xaxis.set_ticks(ticks,minor=False) + if ymax > ymin: + plt.ylim(ymin,ymax) + ticks = frange(ymin,ymax,npts=(ymax-ymin)/deltaY+1) + ax.yaxis.set_ticks(ticks,minor=False) + ax.set_title(pltTit) + ax.xaxis.tick_bottom(); ax.yaxis.tick_left() # put tick marks only on the left (for y) and bottom (for x) axis + if xScale == 'log': + ax.set_xscale('log') + else: + ax.set_xscale('linear') + if yScale == 'log': + ax.set_yscale('log') + else: + ax.set_yscale('linear') + if(nY>1): + l = legend(prop={'size':10},loc='best') + l.set_zorder(20) # put the legend on top + plt.savefig(pltName) + show() + # release work arrays + x=0.; y=0. + + +def pltSca1F(x, y, pltName, xLabel, yLabel, pmin, pmax, delP, pTit, pFname, xScale, yScale): + #*************************************# + # Plot a single-frame scatter diagram # + #*************************************# + fig = plt.figure(); ax = fig.add_subplot(1,1,1) + lTyp='o' + plot(x,y,lTyp,c='r') + if pmax > pmin: + ticks = frange(pmin,pmax,npts=(pmax-pmin)/delP+1) + plt.xlim(pmin,pmax); ax.xaxis.set_ticks(ticks,minor=False) + plt.ylim(pmin,pmax); ax.yaxis.set_ticks(ticks,minor=False) + ax.set_title(pTit) + if xScale == 'log': + ax.set_xscale('log') + else: + ax.set_xscale('linear') + if yScale == 'log': + ax.set_yscale('log') + else: + ax.set_yscale('linear') + xlabel(xLabel,fontsize=10); ylabel(yLabel,fontsize=10) + l = legend(prop={'size':8},loc='best') + l.set_zorder(20) + plt.savefig(pFname) + show() + x=0.; y=0. + + +def pltSca6F(x, dName, pmin, pmax, delP, pTitle, pFname, xScale, yScale): + #****************************************************# + # Plot up to 6 frames (6 rows X 2 columns) on a page # + #****************************************************# + nPlt = x.shape[0] + if pmax > pmin: + ticks = frange(pmin,pmax,npts=(pmax-pmin)/delP+1) + if nPlt > 6: + print 'frames exceed 12: return' + return + nrows = 3; ncols = 2; npmax = nrows*ncols; lTyp='o'; xlabcol='black'; ylabcol='green' + fig = plt.figure() + plt.subplots_adjust(hspace=0.3, wspace=0.2) + for n in range(1,nPlt): + pid = n % npmax + if pid == 0: + pid = npmax + ax = fig.add_subplot(nrows,ncols,pid) + #ax.set_title(pTitle[n-1],fontsize=6) + if xScale == 'log': + ax.set_xscale('log') + else: + ax.set_xscale('linear') + if yScale == 'log': + ax.set_yscale('log') + else: + ax.set_yscale('linear') + plot(x[0,:],x[n,:],lTyp,label=pTitle[n-1],c='r') + plot(x[0,:],x[n,:],lTyp,c='r') + plot(frange(pmin,pmax),c='b') + l = legend(prop={'size':8},loc='best') + l.set_zorder(20) + xlabel(dName[0],fontsize=10); ylabel(dName[n],fontsize=10) + if pmax > pmin: + plt.xlim(pmin,pmax); ax.xaxis.set_ticks(ticks,minor=False) + plt.ylim(pmin,pmax); ax.yaxis.set_ticks(ticks,minor=False) + for label in ax.xaxis.get_ticklabels(): + label.set_color(xlabcol) + label.set_rotation(0) + label.set_fontsize(8) + for label in ax.yaxis.get_ticklabels(): + label.set_color(ylabcol) + label.set_rotation(0) + label.set_fontsize(8) + plt.savefig(pFname) + show() + x=0. + +def drawContourMapSingle(data, lats, lons, cLevs, fName, fType = 'png', + cLabel = '', pTitle = '', cMap = None, nParallels = 5, nMeridians = 5): + ''' + Purpose:: + Plots a filled contour map. + Input:: + data - 2d array of the field to be plotted with shape (nLon, nLat) + lats - array of latitudes + lons - array of longitudes + cLevs - A list of ints or floats specifying contour levels + fName - a string specifying the filename of the plot + fType - an optional string specifying the filetype, default is .png + cLabel - an optional string specifying the colorbar title + pTitle - an optional string specifying plot title + cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap + nParallels - an optional int for the number of parallels to draw + nMeridians - an optional int for the number of meridians to draw + ''' + # Set up the colormap if not specified + if cMap is None: + cMap = plt.cm.RdBu_r + + # Set up the figure + fig = plt.figure() + ax = fig.add_subplot(111) + + # Determine the map boundaries and construct a Basemap object + lonMin = lons.min() + lonMax = lons.max() + latMin = lats.min() + latMax = lats.max() + m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax, + llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l', ax = ax) + + # Draw the borders for coastlines and countries + m.drawcoastlines(linewidth = 1) + m.drawcountries(linewidth = .75) + + # Draw parallels / meridians. + m.drawmeridians(np.linspace(lonMin, lonMax, nMeridians), labels = [0, 0, 0, 1]) + m.drawparallels(np.linspace(latMin, latMax, nMeridians), labels = [1, 0, 0, 1]) + + # Convert lats and lons to projection coordinates + if lats.ndim == 1 and lons.ndim == 1: + lons, lats = np.meshgrid(lons, lats) + x, y = m(lons, lats) + + # Plot data with filled contours + cs = m.contourf(x, y, data, cmap = cMap, levels = cLevs) + + # Add colorbar + cbar = m.colorbar(cs, drawedges = True, pad = '2%', size = '3%') + cbar.set_label(cLabel) + cbar.set_ticks(cLevs) + cbar.ax.xaxis.set_ticks_position("none") + cbar.ax.yaxis.set_ticks_position("none") + + # Add title and save the figure + ax.set_title(pTitle) + fig.savefig('%s.%s' %(fName, fType)) + show() + +def drawCntrMap(data,lon,lat,titles,ncols,pFname): + ''' + This routine is based on PyNGL, i.e., NcarGraphics + data - a masked numpy array of data to plot (nT,nY,nX) + lon - longitude (nY,nX) + lat - latitude (nY,nX) + titles - array of titles (nT) + nrows - number of rows of the paneled plots + ncols - number of columns of the paneled plots + nT = nrows * ncols + pFname - name of output postscript file + ''' + wks_type = 'ps' + if data.ndim == 2: + nT = 1; nrows = 1; ncols = 1 + elif data.ndim == 3: + nT=data.shape[0] + if nT % ncols == 0: + nrows = nT/ncols + else: + nrows = nT/ncols + 1 + # set workstation type (X11, ps, png) + res = Ngl.Resources() + wks = Ngl.open_wks(wks_type,pFname,res) + # set plot resource paramters + resources = Ngl.Resources() + resources.mpLimitMode = "LatLon" # Limit the map view. + resources.mpMinLonF = lon.min() + resources.mpMaxLonF = lon.max() + resources.mpMinLatF = lat.min() + resources.mpMaxLatF = lat.max() + resources.cnFillOn = True + resources.cnLineLabelsOn = False # Turn off line labels. + resources.cnInfoLabelOn = False # Turn off info label. + resources.cnLinesOn = False # Turn off countour line (only filled colors) + resources.sfXArray = lon[:,:] + resources.sfYArray = lat[:,:] + resources.pmTickMarkDisplayMode = "Never" # Turn off map tickmarks. + resources.mpOutlineBoundarySets = "GeophysicalAndUSStates" + resources.mpGeophysicalLineColor = "red" + resources.mpUSStateLineColor = "red" + resources.mpGeophysicalLineThicknessF = 0.75 + resources.mpUSStateLineThicknessF = 0.75 + resources.mpPerimOn = True # Turn on/off map perimeter + resources.mpGridAndLimbOn = True # Turn off map grid. + resources.nglFrame = False # Don't advance the frame + resources.nglDraw = False + plot=[] + for iT in np.arange(nT): + resources.tiMainString = titles[iT] + #resources.pmLabelBarDisplayMode = "Never" # Turn off individual label bars + resources.pmLabelBarDisplayMode = "Always" # Turn on individual label bars + if data.ndim == 3: + plot.append(Ngl.contour_map(wks,data[iT,:,:],resources)) + elif data.ndim == 2: + plot.append(Ngl.contour_map(wks,data[:,:],resources)) + panelres = Ngl.Resources() + panelres.nglPanelTop=0.95 + panelres.nglPanelBottom=0.05 + panelres.nglPanelLabelBar = False # Turn on panel labelbar + panelres.nglPanelLabelBarLabelFontHeightF = 0.012 # Labelbar font height + panelres.nglPanelLabelBarHeightF = 0.03 # Height of labelbar + panelres.nglPanelLabelBarWidthF = 0.8 # Width of labelbar + panelres.lbLabelFont = "helvetica-bold" # Labelbar font + Ngl.panel(wks,plot[0:nT],[nrows,ncols],panelres) + Ngl.destroy(wks) + del plot + del resources + del wks + return + +def drawContourMap(dataset, lats, lons, fName, fType = 'png', gridShape = (1, 1), + cLabel = '', pTitle = '', subTitles = None, cMap = None, + cLevs = None, nLevs = 10, parallels = None, meridians = None, + extend = 'neither'): + ''' + Purpose:: + Create a multiple panel contour map plot. + Input:: + dataset - 3d array of the field to be plotted with shape (nT, nLon, nLat) + lats - array of latitudes + lons - array of longitudes + fName - a string specifying the filename of the plot + fType - an optional string specifying the filetype, default is .png + gridShape - optional tuple denoting the desired grid shape (nRows, nCols) for arranging + the subplots. + cLabel - an optional string specifying the colorbar title + pTitle - an optional string specifying plot title + subTitles - an optional list of strings specifying the title for each subplot + cMap - an optional matplotlib.LinearSegmentedColormap object denoting the colormap + cLevs - an optional list of ints or floats specifying contour levels + nLevs - an optional integer specifying the target number of contour levels if + cLevs is None + parallels - an optional list of ints or floats for the parallels to be drawn + meridians - an optional list of ints or floats for the meridians to be drawn + extend - an optional string to toggle whether to place arrows at the colorbar + boundaries. Default is 'neither', but can also be 'min', 'max', or + 'both'. Will be automatically set to 'both' if cLevs is None. + ''' + # Handle the single plot case. Meridians and Parallels are not labeled for + # multiple plots to save space. + if dataset.ndim == 2 or (dataset.ndim == 3 and dataset.shape[0] == 1): + if dataset.ndim == 2: + dataset = dataset.reshape(1, *dataset.shape) + nPlots = 1 + mLabels = [0, 0, 0, 1] + pLabels = [1, 0, 0, 1] + else: + nPlots = dataset.shape[0] + mLabels = [0, 0, 0, 0] + pLabels = [0, 0, 0, 0] + + # Make sure gridShape is compatible with input data + gridShape = calcBestGridShape(nPlots, gridShape) + + # Set up the colormap if not specified + if cMap is None: + cMap = plt.cm.coolwarm + + # Set up the figure + fig = plt.figure() + #fig.set_size_inches((8.5, 11.)) + #fig.dpi = 600 + + # Make the subplot grid + grid = ImageGrid(fig, 111, + nrows_ncols = gridShape, + axes_pad = 0.3, + share_all = True, + add_all = True, + ngrids = nPlots, + label_mode = "L", + cbar_mode = 'single', + cbar_location = 'bottom', + cbar_pad = '0%' + ) + + # Determine the map boundaries and construct a Basemap object + lonMin = lons.min() + lonMax = lons.max() + latMin = lats.min() + latMax = lats.max() + m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax, + llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l') + + # Convert lats and lons to projection coordinates + if lats.ndim == 1 and lons.ndim == 1: + lons, lats = np.meshgrid(lons, lats) + + # Calculate contour levels if not given + if cLevs is None: + # Cut off the tails of the distribution + # for more representative contour levels + cLevs = calcNiceIntervals(dataset, nLevs) + extend = 'both' + + # Create default meridians and parallels + if meridians is None: + meridians = np.arange(-180, 181, 15) + if parallels is None: + parallels = np.arange(-90, 91, 15) + + x, y = m(lons, lats) + for i, ax in enumerate(grid): + # Load the data to be plotted + data = dataset[i] + m.ax = ax + + # Draw the borders for coastlines and countries + m.drawcoastlines(linewidth = 1) + m.drawcountries(linewidth = .75) + + # Draw parallels / meridians + m.drawmeridians(meridians, labels = mLabels, linewidth = .75) + m.drawparallels(parallels, labels = pLabels, linewidth = .75) + + # Draw filled contours + cs = m.contourf(x, y, data, cmap = cMap, levels = cLevs, extend = extend) + + # Add title + if subTitles is not None: + ax.set_title(subTitles[i], fontsize = 'small') + + + # Add colorbar + cbar = fig.colorbar(cs, cax = ax.cax, drawedges = True, orientation = 'horizontal', + extendfrac = 'auto') + cbar.set_label(cLabel) + cbar.set_ticks(cLevs) + cbar.ax.xaxis.set_ticks_position("none") + cbar.ax.yaxis.set_ticks_position("none") + + # This is an ugly hack to make the title show up at the correct height. + # Basically save the figure once to achieve tight layout and calculate + # the adjusted heights of the axes, then draw the title slightly above + # that height and save the figure again + fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) + ymax = 0 + for ax in grid: + bbox = ax.get_position() + ymax = max(ymax, bbox.ymax) + + # Add figure title + fig.suptitle(pTitle, y = ymax + .04, fontsize = 16) + fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) + plt.show() + fig.clf() + +def drawSubRegions(subRegions, lats, lons, fName, fType = 'png', pTitle = '', + parallels = None, meridians = None, subRegionMasks = None): + ''' + Purpose:: + Function to draw subregion domain(s) on a map + + Input:: + subRegions - a list of subRegion objects + lats - array of latitudes + lons - array of longitudes + fName - a string specifying the filename of the plot + fType - an optional string specifying the filetype, default is .png + pTitle - an optional string specifying plot title + parallels - an optional list of ints or floats for the parallels to be drawn + meridians - an optional list of ints or floats for the meridians to be drawn + subRegionMasks - optional dictionary of boolean arrays for each subRegion + for giving finer control of the domain to be drawn, by default + the entire domain is drawn. + ''' + # Set up the figure + fig = plt.figure() + fig.set_size_inches((8.5, 11.)) + fig.dpi = 300 + ax = fig.add_subplot(111) + + # Determine the map boundaries and construct a Basemap object + lonMin = lons.min() + lonMax = lons.max() + latMin = lats.min() + latMax = lats.max() + m = Basemap(projection = 'cyl', llcrnrlat = latMin, urcrnrlat = latMax, + llcrnrlon = lonMin, urcrnrlon = lonMax, resolution = 'l', ax = ax) + + # Draw the borders for coastlines and countries + m.drawcoastlines(linewidth = 1) + m.drawcountries(linewidth = .75) + m.drawstates() + + # Create default meridians and parallels + if meridians is None: + meridians = np.arange(-180, 181, 15) + if parallels is None: + parallels = np.arange(-90, 91, 15) + + # Draw parallels / meridians + m.drawmeridians(meridians, labels = [0, 0, 0, 1], linewidth = .75) + m.drawparallels(parallels, labels = [1, 0, 0, 1], linewidth = .75) + + # Set up the color scaling + cMap = plt.cm.jet + norm = mpl.colors.BoundaryNorm(np.arange(1, len(subRegions) + 3), cMap.N) + + # Process the subregions + for i, reg in enumerate(subRegions): + if subRegionMasks is not None and reg.name in subRegionMasks.keys(): + domain = (i + 1) * subRegionMasks[reg.name] + else: + domain = (i + 1) * np.ones((2, 2)) + + nLats, nLons = domain.shape + domain = ma.masked_equal(domain, 0) + regLats = np.linspace(reg.latMin, reg.latMax, nLats) + regLons = np.linspace(reg.lonMin, reg.lonMax, nLons) + regLons, regLats = np.meshgrid(regLons, regLats) + + # Convert to to projection coordinates. Not really necessary + # for cylindrical projections but keeping it here in case we need + # support for other projections. + x, y = m(regLons, regLats) + + # Draw the subregion domain + m.pcolormesh(x, y, domain, cmap = cMap, norm = norm, alpha = .5) + + # Label the subregion + xm, ym = x.mean(), y.mean() + m.plot(xm, ym, marker = '$%s$' %(reg.name), markersize = 12, color = 'k') + + # Add the the title + ax.set_title(pTitle) + + # Save the figure + fig.savefig('%s.%s' %(fName, fType), bbox_inches = 'tight', dpi = fig.dpi) + show() + fig.clf() + +def metrics_plots(varName, numOBS, numMDL, nT, ngrdY, ngrdX, Times, lons, lats, allData, dataList, workdir, subRegions, timeStep, fileOutputOption): + ''' + Calculate evaluation metrics and generate plots. + ''' + ################################################################################################################## + # Routine to compute evaluation metrics and generate plots + # (1) metric calculation + # (2) plot production + # Input: + # numOBS - the number of obs data. either 1 or >2 as obs ensemble is added for multi-obs cases + # numMDL - the number of mdl data. either 1 or >2 as obs ensemble is added for multi-mdl cases + # nT - the length of the data in the time dimension + # ngrdY - the length of the data in Y dimension + # ngrdX - the length of the data in the X dimension + # Times - time stamps + # lons,lats - longitude & latitude values of the data domain (same for model & obs) + # allData - the sum of the observed and model data (combines obsData & mdlData in the old code) + # dataList - the list of data names (combines obsList and mdlList in the old code) + # workdir - string describing the directory path for storing results and plots + # subRegions - list of SubRegion Objects or False + # fileOutputOption - option to write regridded data in a netCDF file or not + # Output: image files of plots + possibly data + #****************************************************** + # JK2.0: Only the data interpolated temporally and spatially onto the analysis grid + # are transferred into this routine. The rest of processing (e.g., area-averaging, etc.) + # are to be performed in this routine. Do not overwrite obsData[numOBs,nt,ngrdY,ngrdX] & + # mdlData[numMDL,nt,ngrdY,ngrdX]. These are the raw, re-gridded data to be used repeatedly + # for multiple evaluation steps as desired by an evaluator + # JK2.1: The observed and model data are unified in a single variable for ease of processing + ################################################################################################################## + + print '' + print 'Start metrics.py' + print '' + # JK2.1: define the variable to represent the total number of combined (obs + model) datasets + numDatasets = numOBS + numMDL + + ##################################################################################################### + # JK2.0: Compute evaluation metrics and plots to visualize the results + ##################################################################################################### + # (mp.001) Sub-regions for local timeseries analysis + #-------------------------------- + # Enter the location of the subrgns via screen input of data; + # modify this to add an option to read-in from data file(s) + #---------------------------------------------------------------------------------------------------- + if subRegions: + numSubRgn = len(subRegions) + subRgnName = [ x.name for x in subRegions ] + subRgnLon0 = [ x.lonMin for x in subRegions ] + subRgnLon1 = [ x.lonMax for x in subRegions ] + subRgnLat0 = [ x.latMin for x in subRegions ] + subRgnLat1 = [ x.latMax for x in subRegions ] + else: + print '' + ans = raw_input('Calculate area-mean timeseries for subregions? y/n: [n] \n') + print '' + if ans == 'y': + ans = raw_input('Input subregion info interactively? y/n: \n> ') + if ans == 'y': + numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_interactively() + else: + print 'Read subregion info from a pre-fabricated text file' + ans = raw_input('Read from a defaule file (workdir + "/sub_regions.txt")? y/n: \n> ') + if ans == 'y': + subRgnFileName = workdir + "/sub_regions.txt" + else: + subRgnFileName = raw_input('Enter the subRgnFileName to read from \n') + print 'subRgnFileName ', subRgnFileName + numSubRgn, subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 = misc.assign_subRgns_from_a_text_file(subRgnFileName) + print subRgnName, subRgnLon0, subRgnLon1, subRgnLat0, subRgnLat1 + else: + numSubRgn = 0 + # compute the area-mean timeseries for all subregions if subregion(s) are defined. + # the number of subregions is usually small and memory usage is usually not a concern + dataRgn = np.zeros((numDatasets, numSubRgn, nT)) + + if subRegions: + print 'Enter area-averaging: allData.shape ', allData.shape + print 'Using Latitude/Longitude Mask for Area Averaging' + for n in np.arange(numSubRgn): + # Define mask using regular lat/lon box specified by users ('mask=True' defines the area to be excluded) + maskLonMin = subRegions[n].lonMin + if maskLonMin > 180.: + maskLonMin = maskLonMin - 360. + maskLonMax = subRegions[n].lonMax + if maskLonMax > 180.: + maskLonMax = maskLonMax - 360. + maskLatMin = subRegions[n].latMin + maskLatMax = subRegions[n].latMax + mask = np.logical_or(np.logical_or(lats <= maskLatMin, lats >= maskLatMax), + np.logical_or(lons <= maskLonMin, lons >= maskLonMax)) + + # Calculate area-weighted averages within this region and store in a new list + for k in np.arange(numDatasets): #JK2.1: area-average all data + Store = [] + for t in np.arange(nT): + Store.append(process.calc_area_mean(allData[k, t, :, :], lats, lons, mymask = mask)) + dataRgn[k, n, :] = ma.array(Store[:]) + Store = [] # release the memory allocated by temporary vars + + #------------------------------------------------------------------------- + # (mp.002) fileOutputOption: The option to create a binary or netCDF file of processed + # (re-gridded and regionally-averaged) data for user-specific processing. + # This option is useful for advanced users who need more than + # the metrics and vidualization provided in the basic package. + #---------------------------------------------------------------------------------------------------- + print '' + if not fileOutputOption: + while fileOutputOption not in ['no', 'nc']: + fileOutputOption = raw_input('Option for output files of obs/model data: Enter no/nc \ + for no, netCDF file \n> ').lower() + print '' + + # write a netCDF file for post-processing if desired. JK21: binary output option has been completely eliminated + if fileOutputOption == 'nc': + fileName = '%s/%s_Tseries.nc' % (workdir, varName) + tempName = fileName + if(os.path.exists(tempName) == True): + print "removing %s from the local filesystem, so it can be replaced..." % (tempName,) + cmnd = 'rm -f ' + tempName + subprocess.call(cmnd, shell=True) + files.writeNCfile1(fileName, numSubRgn, lons, lats, allData, dataRgn, dataList, subRegions) + print 'The regridded obs and model data are written in the netCDF file ', fileName + + ##################################################################################################### + ###################### Metrics calculation and plotting cycle starts from here ###################### + ##################################################################################################### + print '' + print 'OBS and MDL data have been prepared for the evaluation step' + print '' + doMetricsOption = raw_input('Want to calculate metrics and plot them? [y/n]\n> ').lower() + if doMetricsOption == 'y': + # Assign the variable name and unit to be used in plots + print 'The variable to be processed is ',timeStep,' ',varName + pltVarName = raw_input('Enter the variable name to appear in the plot\n> ') + pltVarUnit = raw_input('Enter the variable unit to appear in the plot\n> ') + print '' + + #################################################### + # Terminate job if no metrics are to be calculated # + #################################################### + + neval = 0 + + while doMetricsOption == 'y': + neval += 1 + print ' ' + print 'neval= ', neval + print ' ' + #-------------------------------- + # (mp.003) Preparation + #---------------------------------------------------------------------------------------------------- + # Determine info on years (the years in the record [YR] and its number[numYR]) + yy = ma.zeros(nT, 'i') + for n in np.arange(nT): + yy[n] = Times[n].strftime("%Y") + YR = np.unique(yy) + yy = 0 + nYR = len(YR) + print 'nYR, YR = ', nYR, YR + + # Select the eval domain: over the entire domain (spatial distrib) or regional time series + anlDomain = 'n' + anlRgn = 'n' + print ' ' + analSelect = int(raw_input('Eval over domain (Enter 0) or time series of selected Sub Regions (Enter 1) \n> ')) + print ' ' + if analSelect == 0: + anlDomain = 'y' + elif analSelect == 1: + anlRgn = 'y' + else: + print 'analSelect= ', analSelect, ' is Not a valid option: CRASH' + + #-------------------------------------------------------------------------------------------------------------------- + # (mp.004) Select the model and data to be used in the evaluation step + # 6/7/2013: JK4 - unified handling of the ref & mdl datasets allows several diff types of evaluation (RCMES v2.1) + # such as ref vs. one model or ref; ref vs. all models; ref vs. all model + non-ref refs + # refID: the ID of the reference data against which all data are to be evaluated + # mdlID: the list of data ID's to be evaluated. if mdlSelect == -99, the list also includes non-ref obs data + #-------------------------------------------------------------------------------------------------------------------- + refID = int(misc.select_data_combined(numDatasets, Times, dataList, 'ref')) + mdlSelect = int(misc.select_data_combined(numDatasets, Times, dataList, 'mdl')) + mdlID=[] + # Assign the data id to be evaluated. Note that a non-reference obs dataset is treated like a model dataset (mdlSelect == -99) + if mdlSelect >= 0: + mdlID.append(mdlSelect) + elif mdlSelect == -1: + for n in np.arange(numMDL): + mdlID.append(n+numOBS) + elif mdlSelect == -2: + for n in np.arange(refID): + mdlID.append(n) + for n in range(refID + 1, numDatasets): + mdlID.append(n) + elif mdlSelect == -3: + if numOBS == 1: + print 'There exist only one reference data: EXIT' + sys.exit() + for n in np.arange(refID): + mdlID.append(n) + for n in range(refID + 1, numOBS): + mdlID.append(n) + elif mdlSelect == -4: + id4eval = 0 # any number != -9 + print 'Enter the data id to be evaluated: -9 to stop entering' + while id4eval != -9: + id4eval = int(raw_input('Enter the data id for evaluation. -9 to stop entry\n> ')) + if id4eval != -9: + mdlID.append(id4eval) + refName = dataList[refID] + mdlName = [] + numMdl = len(mdlID) + for n in np.arange(numMdl): + tname = dataList[mdlID[n]] + m = min(len(tname), 8) + for k in np.arange(m): + if tname[k] == ' ': + break + elif k == m-1: + k = m + mdlName.append(tname[0:k]) + print 'selected reference and model data for evaluation= ', refName, mdlName + + #-------------------------------- + # (mp.005) Spatial distribution analysis/Evaluation (anlDomain='y') + # Obs/mdl climatology variables are 2-d/3d arrays (e.g., oClim = ma.zeros((ngrdY,ngrdX), mClim = ma.zeros((numMdl,ngrdY,ngrdX)) + #---------------------------------------------------------------------------------------------------- + if anlDomain == 'y': + # first determine the temporal properties to be evaluated + print '' + timeOption = misc.select_timOpt() + print '' + if timeOption == 1: + timeScale = 'annual' + # compute the annual-mean time series and climatology. + oTser, oClim = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times) + mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX)) + mClim = ma.zeros((numMdl, ngrdY, ngrdX)) + for n in np.arange(numMdl): + id = mdlID[n] + mTser[n, :, :, :], mClim[n, :, :] = calc_clim_year(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times) + elif timeOption == 2: + timeScale = 'seasonal' + # select the timeseries and climatology for a season specifiec by a user + mTser = ma.zeros((numMdl, nYR, ngrdY, ngrdX)) + mClim = ma.zeros((numMdl, ngrdY, ngrdX)) + print ' ' + moBgn = int(raw_input('Enter the beginning month for your season. 1-12: \n> ')) + moEnd = int(raw_input('Enter the ending month for your season. 1-12: \n> ')) + print ' ' + if moEnd >= moBgn: + nMoPerSeason = moEnd - moBgn + 1 + oTser, oClim = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times) + for n in np.arange(numMdl): + id = mdlID[n] + mTser[n, :, :, :], mClim[n, :, :] = calc_clim_season(nYR, nT, moBgn, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times) + elif moEnd == moBgn: + # Eval for a single month. mTser, oTser are the annual time series + # for the specified month (moEnd), and mClim, oClim are the corresponding climatology + oTser, oClim = calc_clim_One_month(moEnd, nYR, nT, allData[refID, :, :, :], Times) + for n in np.arange(numMdl): + id = mdlID[n] + mTser[n, :, :, :], mClim[n, :, :] = calc_clim_One_month(moEnd, nYR, nT, allData[id, :, :, :], Times) + elif moEnd < moBgn: # have to lose the ending year. redefine nYR=nYR-1, and drop the YR[nYR] + nMoS1 = 12 - moBgn + 1 + nMoS2 = moEnd + nMoPerSeason = nMoS1 + nMoS2 + mTser = ma.zeros((numMdl, nYR - 1, ngrdY, ngrdX)) + # calculate the seasonal timeseries and climatology for the model data + for n in np.arange(numMdl): + id = mdlID[n] + mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[id, :, :, :], Times) + mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[id, :, :, :], Times) + for i in np.arange(nYR - 1): + mTser[n, i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason + mClim = ma.average(mTser, axis=1) + # repeat for the obs data + mTser1, mClim1 = calc_clim_season(nYR, nT, moBgn, 12, ngrdY, ngrdX, allData[refID, :, :, :], Times) + mTser2, mClim2 = calc_clim_season(nYR, nT, 1, moEnd, ngrdY, ngrdX, allData[refID, :, :, :], Times) + oTser = ma.zeros((nYR - 1, ngrdY, ngrdX)) + for i in np.arange(nYR - 1): + oTser[i, :, :] = (real(nMoS1) * mTser1[i, :, :] + real(nMoS2) * mTser2[i + 1, :, :]) / nMoPerSeason + oClim = ma.zeros((ngrdY, ngrdX)) + oClim = ma.average(oTser, axis=0) + nYR = nYR - 1 + yy = ma.empty(nYR) + for i in np.arange(nYR): + yy[i] = YR[i] + mTser1 = 0. + mTser2 = 0. + mClim1 = 0. + mClim2 = 0. + elif timeOption == 3: + timeScale = 'monthly' + # compute the monthly-mean time series and climatology + # Note that the shapes of the output vars are: + # mTser = ma.zeros((nYR,12,ngrdY,ngrdX)) & mClim = ma.zeros((12,ngrdY,ngrdX)) + # Also same for oTser = ma.zeros((nYR,12,ngrdY,ngrdX)) &,oClim = ma.zeros((12,ngrdY,ngrdX)) + oTser, oClim = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[refID, :, :, :], Times) + mTser = ma.zeros((numMdl, nYR, 12, ngrdY, ngrdX)) + mClim = ma.zeros((numMdl, 12, ngrdY, ngrdX)) + for n in np.arange(numMdl): + id = mdlID[n] + mTser[n, :, :, :, :], mClim[n, :, :, :] = calc_clim_mo(nYR, nT, ngrdY, ngrdX, allData[id, :, :, :], Times) + else: + # undefined process options. exit + print 'The desired temporal scale is not available this time. END the job' + sys.exit() + + #-------------------------------- + # (mp.006) Select metric to be calculated + # bias, mae, acct, accs, pcct, pccs, rmst, rmss, pdfSkillScore, taylor diagram + #---------------------------------------------------------------------------------------------------- + print ' ' + metricOption = misc.select_metrics() + print ' ' + + # metrics calculation: the shape of metricDat varies according to the metric type & timescale opetions + + # metrics below yields a 2-d (annual or seasonal) or 3-d (monthly) array for each model + if metricOption == 'BIAS': + if timeScale == 'monthly': + oStdv = np.zeros(12) + metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) + for n in np.arange(numMdl): + for m in np.arange(12): + metricDat[n, m, :, :] = calc_bias(mTser[n, :, m, :, :], oTser[m, :, :]) + if n == 0: + oStdv[m] = calc_temporal_stdv(oTser[m, :, :]) + else: + oStdv = calc_temporal_stdv(oTser) + metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) + for n in np.arange(numMdl): + metricDat[n, :, :] = calc_bias(mTser[n, :, :, :], oTser) + + elif metricOption == 'MAE': + if timeScale == 'monthly': + metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) + for n in np.arange(numMdl): + for m in np.arange(12): + metricDat[n, m, :, :] = calc_mae(mTser[n, :, m, :, :], oTser[m, :, :]) + else: + metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) + for n in np.arange(numMdl): + metricDat[n, :, :] = calc_mae(mTser[n, :, :, :], oTser) + + elif metricOption == 'ACCt': + if timeScale == 'monthly': + metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) + for n in np.arange(numMdl): + for m in np.arange(12): + metricDat[n, m, :, :] = calc_temporal_anom_cor(mTser[n, :, m, :, :], oTser[m, :, :]) + else: + metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) + for n in np.arange(numMdl): + metricDat[n, :, :] = calc_temporal_anom_cor(mTser[n, :, :, :], oTser) + + elif metricOption == 'PCCt': + if timeScale == 'monthly': + metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) + for n in np.arange(numMdl): + for m in np.arange(12): + metricDat[n, m, :, :] = calc_temporal_pat_cor(mTser[n, :, m, :, :], oTser[m, :, :]) + else: + metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) + for n in np.arange(numMdl): + metricDat[n, :, :] = calc_temporal_pat_cor(mTser[n, :, :, :], oTser) + + elif metricOption == 'RMSt': + if timeScale == 'monthly': + metricDat = ma.zeros((numMdl, 12, ngrdY, ngrdX)) + for n in np.arange(numMdl): + for m in np.arange(12): + metricDat[n, m, :, :] = calc_rms(mTser[n, :, m, :, :], oTser[m, :, :]) + else: + metricDat = ma.zeros((numMdl, ngrdY, ngrdX)) + for n in np.arange(numMdl): + metricDat[n, :, :] = calc_rms(mTser[n, :, :, :], oTser) + + # metrics below yields a scalar value for each model + elif metricOption == 'ACCs': + if timeScale == 'monthly': + metricDat = ma.zeros((numMdl, 12)) + for n in np.arange(numMdl): + for m in np.arange(12): + metricDat[n, m] = calc_spatial_anom_cor(mClim[n, m, :, :], oClim[m, :, :]) + else: + metricDat = ma.zeros(numMdl) + for n in np.arange(numMdl): + metricDat[n] = calc_spatial_anom_cor(mClim[n, :, :], oClim) + + elif metricOption == 'PCCs': + if timeScale == 'monthly': + metricDat = ma.zeros((numMdl, 12)) + for n in np.arange(numMdl): + for m in np.arange(12): + metricDat[n, m] = calc_spatial_pat_cor(mClim[n, m, :, :], oClim[m, :, :]) + else: + metricDat = ma.zeros(numMdl) + for n in np.arange(numMdl): + metricDat[n] = calc_spatial_pat_cor(mTmp[n, :, :], oTmp) + + elif metricOption == 'RMSs': + if timeScale == 'monthly': + metricDat = ma.zeros((numMdl, 12)) + for n in np.arange(numMdl): + for m in np.arange(12): + metricDat[n, m] = rms_dom(mClim[n, m, :, :], oClim[m, :, :]) + else: + metricDat = ma.zeros(numMdl) + for n in np.arange(numMdl): + metricDat[n] = rms_dom(mClim[n, :, :], oClim) + + # metrics to plot taylor diagram + elif metricOption == 'Taylor_space': + if timeScale == 'monthly': + oStdv = ma.zeros(12) + mStdv = ma.zeros((numMdl, 12)) + mCorr = ma.zeros((numMdl, 12)) + mTemp = ma.zeros((ngrdY, ngrdX)) + for m in np.arange(12): + oStdv[m] = oClim[m, :, :].std() + for n in np.arange(numMdl): + for m in np.arange(12): + mTemp = mClim[n, m, :, :] + mStdv[n, m] = mTemp.std() + mCorr[n, m] = calc_spatial_pat_cor(mTemp, oClim) + else: + oStdv = oClim.std() + mStdv = ma.zeros(numMdl) + mCorr = ma.zeros(numMdl) + for n in np.arange(numMdl): + mStdv[n] = mClim[n, :, :].std() + mCorr[n] = calc_spatial_pat_cor(mClim[n, :, :], oClim) + mStdv = mStdv / oStdv # standardized deviation + + #-------------------------------- + # (mp.007) Plot the metrics. First, enter plot info + #---------------------------------------------------------------------------------------------------- + + # Taylor diagram [... 312 lines stripped ...]