Return-Path: X-Original-To: apmail-climate-commits-archive@minotaur.apache.org Delivered-To: apmail-climate-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 63924172C9 for ; Thu, 16 Oct 2014 17:17:01 +0000 (UTC) Received: (qmail 2563 invoked by uid 500); 16 Oct 2014 17:17:01 -0000 Delivered-To: apmail-climate-commits-archive@climate.apache.org Received: (qmail 2475 invoked by uid 500); 16 Oct 2014 17:17:01 -0000 Mailing-List: contact commits-help@climate.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@climate.apache.org Delivered-To: mailing list commits@climate.apache.org Received: (qmail 2308 invoked by uid 99); 16 Oct 2014 17:17:01 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 16 Oct 2014 17:17:01 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id E8E3EA0081A; Thu, 16 Oct 2014 17:17:00 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: lewismc@apache.org To: commits@climate.apache.org Date: Thu, 16 Oct 2014 17:17:39 -0000 Message-Id: <05d72565e36f4efb98bfdcaa8478c555@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [43/50] [abbrv] git commit: Fix formatting, remove unused imports, update files.py and process.py to most recent OCW code Fix formatting, remove unused imports, update files.py and process.py to most recent OCW code Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/dc091596 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/dc091596 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/dc091596 Branch: refs/heads/master Commit: dc091596038698eb15845a254ae0e76d52cf64ed Parents: f9ae9c2 Author: Lewis John McGibbney Authored: Mon Aug 25 16:45:06 2014 -0700 Committer: Lewis John McGibbney Committed: Wed Oct 15 15:18:05 2014 -0700 ---------------------------------------------------------------------- mccsearch/code/__init__.py | 0 mccsearch/code/files.py | 375 ++++++---- mccsearch/code/mainProg.py | 19 +- mccsearch/code/mainProgTemplate.py | 16 + mccsearch/code/mccSearch.py | 449 ++++++------ mccsearch/code/mccSearchUI.py | 16 + mccsearch/code/process.py | 1221 ++++++++++++------------------- mccsearch/docs/mccsearch.md | 18 +- 8 files changed, 992 insertions(+), 1122 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/dc091596/mccsearch/code/__init__.py ---------------------------------------------------------------------- diff --git a/mccsearch/code/__init__.py b/mccsearch/code/__init__.py new file mode 100644 index 0000000..e69de29 http://git-wip-us.apache.org/repos/asf/climate/blob/dc091596/mccsearch/code/files.py ---------------------------------------------------------------------- diff --git a/mccsearch/code/files.py b/mccsearch/code/files.py index 82b508a..b238754 100644 --- a/mccsearch/code/files.py +++ b/mccsearch/code/files.py @@ -1,25 +1,36 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# """ -Module for handling data input files. Requires PyNIO and Numpy be +Module for handling data input files. Requires netCDF and Numpy be installed. -This module can easily open NetCDF, HDF and Grib files. Search the PyNIO +This module can easily open NetCDF, HDF and Grib files. Search the netCDF4 documentation for a complete list of supported formats. """ from os import path - -try: - import Nio -except ImportError: - import nio as Nio - +import netCDF4 import numpy as np import numpy.ma as ma import sys -import process #KDW added -#from toolkit import process -#from utils import fortranfile -#from utils import misc + +from toolkit import process +from utils import fortranfile +from utils import misc VARIABLE_NAMES = {'time': ['time', 'times', 'date', 'dates', 'julian'], @@ -48,15 +59,14 @@ def getVariableByType(filename, variableType): name match cannot be found. """ try: - f = Nio.open_file(filename) + f = netCDF4.Dataset(filename, mode='r') except: - #print 'PyNio had an issue opening the filename (%s) you provided' % filename - print "NIOError:", sys.exc_info()[0] + print "netCDF4Error:", sys.exc_info()[0] raise variableKeys = f.variables.keys() f.close() - variableKeys = [variable.lower() for variable in variableKeys] + variableKeys = [variable.encode().lower() for variable in variableKeys] variableMatch = VARIABLE_NAMES[variableType] commonVariables = list(set(variableKeys).intersection(variableMatch)) @@ -80,10 +90,9 @@ def getVariableRange(filename, variableName): variableRange - tuple of order (variableMin, variableMax) """ try: - f = Nio.open_file(filename) + f = netCDF4.Dataset(filename, mode='r') except: - #print 'PyNio had an issue opening the filename (%s) you provided' % filename - print "NIOError:", sys.exc_info()[0] + print "netCDF4Error:", sys.exc_info()[0] raise varArray = f.variables[variableName][:] @@ -121,13 +130,9 @@ def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, lonVarNam # ii) find out how many timesteps in the file # (assume same ntimes in each file in list) # -allows you to create an empty array to store variable data for all times - tmp = Nio.open_file(filename) + tmp = netCDF4.Dataset(filename, mode='r') latsraw = tmp.variables[latVarName][:] lonsraw = tmp.variables[lonVarName][:] - lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360. # convert to -180,180 if necessary - - """TODO: Guard against case where latsraw and lonsraw are not the same dim?""" - if(latsraw.ndim == 1): lon, lat = np.meshgrid(lonsraw, latsraw) if(latsraw.ndim == 2): @@ -137,7 +142,7 @@ def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, lonVarNam timesraw = tmp.variables[timeVarName] ntimes = len(timesraw) - print 'Lats and lons read in for first file in filelist', ntimes + print 'Lats and lons read in for first file in filelist' # Create a single empty masked array to store model data from all files t2store = ma.zeros((ntimes * len(filelist), len(lat[:, 0]), len(lon[0, :]))) @@ -151,16 +156,16 @@ def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, lonVarNam # as no assumption made that same number of times in each file... - for ifile in filelist: + for i, ifile in enumerate(filelist): #print 'Loading data from file: ',filelist[i] - f = Nio.open_file(ifile) - t2raw = f.variables[myvar][:] + f = netCDF4.Dataset(ifile, mode='r') + t2raw = ma.array(f.variables[myvar][:]) timesraw = f.variables[timeVarName] time = timesraw[:] ntimes = len(time) print 'file= ', i, 'ntimes= ', ntimes, filelist[i] - print '\nt2raw shape: ', t2raw.shape + print 't2raw shape: ', t2raw.shape # Flatten dimensions which needn't exist, i.e. level # e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do. @@ -175,32 +180,26 @@ def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, lonVarNam if t2tmp.ndim == 2: t2tmp = np.expand_dims(t2tmp, 0) - print "\n timesaccu is: ", timesaccu, time,timesaccu + np.arange(ntimes) - t2store[timesaccu + np.arange(ntimes), :, :] = t2tmp[:, :, :] timestore[timesaccu + np.arange(ntimes)] = time - timesaccu = timesaccu + ntimes - print "\n ***timesaccu is: ", timesaccu + timesaccu += ntimes f.close() - i += 1 - - print '\nData read in successfully with dimensions: ', t2store.shape, timesaccu + + print 'Data read in successfully with dimensions: ', t2store.shape # TODO: search for duplicated entries (same time) and remove duplicates. # Check to see if number of unique times == number of times, if so then no problem if(len(np.unique(timestore)) != len(np.where(timestore != 0)[0].view())): - print '\nWARNING: Possible duplicated times' + print 'WARNING: Possible duplicated times' # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here timestore, _ = process.getModelTimes(filename, timeVarName) - data_dict = {} - data_dict['lats'] = lat - data_dict['lons'] = lon - data_dict['times'] = timestore - data_dict['data'] = t2store - #return lat, lon, timestore, t2store + # Make sure latlon grid is monotonically increasing and that the domains + # are correct + lat, lon, t2store = checkLatLon(lat, lon, t2store) + data_dict = {'lats': lat, 'lons': lon, 'times': timestore, 'data': t2store} return data_dict def select_var_from_file(myfile, fmt='not set'): @@ -209,7 +208,7 @@ def select_var_from_file(myfile, fmt='not set'): Input: myfile - filename - fmt - (optional) specify fileformat for PyNIO if filename suffix is non-standard + fmt - (optional) specify fileformat for netCDF4 if filename suffix is non-standard Output: myvar - variable name in file @@ -220,12 +219,12 @@ def select_var_from_file(myfile, fmt='not set'): print fmt if fmt == 'not set': - f = Nio.open_file(myfile) + f = netCDF4.Dataset(myfile, mode='r') if fmt != 'not set': - f = Nio.open_file(myfile, format=fmt) + f = netCDF4.Dataset(myfile, mode='r', format=fmt) - keylist = f.variables.keys() + keylist = [key.encode().lower() for key in f.variables.keys()] i = 0 for v in keylist: @@ -251,9 +250,8 @@ def select_var_from_wrf_file(myfile): Peter Lean September 2010 ''' - f = Nio.open_file(myfile, format='nc') - - keylist = f.variables.keys() + f = netCDF4.Dataset(myfile, mode='r', format='NETCDF4') + keylist = [key.encode().lower() for key in f.variables.keys()] i = 0 for v in keylist: @@ -270,68 +268,50 @@ def select_var_from_wrf_file(myfile): return mywrfvar -def read_lolaT_from_file(filename, latVarName, lonVarName, timeVarName, file_type): +def read_data_from_one_file(ifile, myvar, latVarName, lonVarName, timeVarName, file_type): """ - Function that will return lat, lon, and time arrays + Purpose:: + Read in data from one file at a time - Input:: - filename - the file to inspect - latVarName - name of the Latitude Variable + Input:: + filelist - list of filenames (including path) + myvar - string containing name of variable to load in (as it appears in file)s lonVarName - name of the Longitude Variable timeVarName - name of the Time Variable - fileType = type of file we are trying to parse - - Output:: - lat - MESH GRID of Latitude values with shape (nx, ny) - lon - MESH GRID of Longitude values with shape (nx, ny) - timestore - Python list of Datetime objects - - MESHGRID docs: http://docs.scipy.org/doc/numpy/reference/generated/numpy.meshgrid.html + fileType - type of file we are trying to parse - """ - - tmp = Nio.open_file(filename, format=file_type) - lonsraw = tmp.variables[lonVarName][:] - latsraw = tmp.variables[latVarName][:] - lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360. # convert to -180,180 if necessary - if(latsraw.ndim == 1): - lon, lat = np.meshgrid(lonsraw, latsraw) - if(latsraw.ndim == 2): - lon = lonsraw - lat = latsraw - timestore, _ = process.getModelTimes(filename, timeVarName) - print ' read_lolaT_from_file: Lats, lons and times read in for the model domain' - return lat, lon, timestore - -def read_data_from_one_file(ifile, myvar, timeVarName, lat, file_type): - ################################################################################## - # Read in data from one file at a time - # Input: filelist - list of filenames (including path) - # myvar - string containing name of variable to load in (as it appears in file) - # Output: lat, lon - 2D array of latitude and longitude values - # times - list of times - # t2store - numpy array containing data from all files - # Modified from read_data_from_file_list to read data from multiple models into a 4-D array - # 1. The code now processes model data that completely covers the 20-yr period. Thus, - # all model data must have the same time levels (ntimes). Unlike in the oroginal, ntimes - # is fixed here. - # 2. Because one of the model data exceeds 240 mos (243 mos), the model data must be - # truncated to the 240 mons using the ntimes determined from the first file. - ################################################################################## - f = Nio.open_file(ifile) + Output:: + lat, lon - 2D arrays of latitude and longitude values + times - list of times + t2store - numpy array containing data from the file for the requested variable + varUnit - units for the variable given by t2store + """ + f = netCDF4.Dataset(ifile, mode='r') try: - varUnit = f.variables[myvar].units.upper() + varUnit = f.variables[myvar].units.encode().upper() except: varUnit = raw_input('Enter the model variable unit: \n> ').upper() - t2raw = f.variables[myvar][:] + t2raw = ma.array(f.variables[myvar][:]) t2tmp = t2raw.squeeze() if t2tmp.ndim == 2: t2tmp = np.expand_dims(t2tmp, 0) - t2tmp = t2tmp + + lonsraw = f.variables[lonVarName][:] + latsraw = f.variables[latVarName][:] + if(latsraw.ndim == 1): + lon, lat = np.meshgrid(lonsraw, latsraw) + if(latsraw.ndim == 2): + lon = lonsraw + lat = latsraw + f.close() print ' success read_data_from_one_file: VarName=', myvar, ' Shape(Full)= ', t2tmp.shape, ' Unit= ', varUnit timestore = process.decode_model_timesK(ifile, timeVarName, file_type) - return timestore, t2tmp, varUnit + + # Make sure latlon grid is monotonically increasing and that the domains + # are correct + lat, lon, t2store = checkLatLon(lat, lon, t2tmp) + return lat, lon, timestore, t2store, varUnit def findTimeVariable(filename): """ @@ -343,12 +323,12 @@ def findTimeVariable(filename): variableNameList - list of variable names from the input filename """ try: - f = Nio.open_file(filename, mode='r') + f = netCDF4.Dataset(filename, mode='r') except: print("Unable to open '%s' to try and read the Time variable" % filename) raise - variableNameList = f.variables.keys() + variableNameList = [variable.encode() for variable in f.variables.keys()] # convert all variable names into lower case varNameListLowerCase = [x.lower() for x in variableNameList] @@ -383,12 +363,12 @@ def findLatLonVarFromFile(filename): -lonMax """ try: - f = Nio.open_file(filename, mode='r') + f = netCDF4.Dataset(filename, mode='r') except: print("Unable to open '%s' to try and read the Latitude and Longitude variables" % filename) raise - variableNameList = f.variables.keys() + variableNameList = [variable.encode() for variable in f.variables.keys()] # convert all variable names into lower case varNameListLowerCase = [x.lower() for x in variableNameList] @@ -449,16 +429,17 @@ def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, lonVarN # i) read in lats, lons # ii) find out how many timesteps in the file (assume same ntimes in each file in list) # -allows you to create an empty array to store variable data for all times - tmp = Nio.open_file(filelist[0], format=file_type) + tmp = netCDF4.Dataset(filelist[0], mode='r', format=file_type) latsraw = tmp.variables[latVarName][:] lonsraw = tmp.variables[lonVarName][:] - lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360. # convert to -180,180 if necessary + timesraw = tmp.variables[timeVarName] + if(latsraw.ndim == 1): lon, lat = np.meshgrid(lonsraw, latsraw) - if(latsraw.ndim == 2): - lon = lonsraw; lat = latsraw - - timesraw = tmp.variables[timeVarName] + + elif(latsraw.ndim == 2): + lon = lonsraw + lat = latsraw ntimes = len(timesraw); nygrd = len(lat[:, 0]); nxgrd = len(lon[0, :]) print 'Lats and lons read in for first file in filelist' @@ -474,16 +455,13 @@ def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, lonVarN # NB. this method allows for missing times in data files # as no assumption made that same number of times in each file... - i = 0 - for ifile in filelist: + for i, ifile in enumerate(filelist): #print 'Loading data from file: ',filelist[i] - f = Nio.open_file(ifile) - t2raw = f.variables[myvar][:] + f = netCDF4.Dataset(ifile, mode='r') + t2raw = ma.array(f.variables[myvar][:]) timesraw = f.variables[timeVarName] - #time = timesraw[0:ntimes] - ntimes = len(timesraw) #ntimes=len(time) - print 'file= ',i,' ntimes= ',ntimes,filelist[i] + #print 'file= ',i,'ntimes= ',ntimes,filelist[i] ## Flatten dimensions which needn't exist, i.e. level ## e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do. ## Code requires data to have dimensions, (time,lat,lon) @@ -493,18 +471,20 @@ def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, lonVarN if t2tmp.ndim == 2: t2tmp = np.expand_dims(t2tmp, 0) #t2store[timesaccu+np.arange(ntimes),:,:]=t2tmp[0:ntimes,:,:] - t2store[i, 0:(ntimes-1), :, :] = t2tmp[0:(ntimes-1), :, :] + t2store[i, 0:ntimes, :, :] = t2tmp[0:ntimes, :, :] #timestore[timesaccu+np.arange(ntimes)]=time #timesaccu=timesaccu+ntimes f.close() - i += 1 print 'Data read in successfully with dimensions: ', t2store.shape # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here ifile = filelist[0] timestore, _ = process.getModelTimes(ifile, timeVarName) - + + # Make sure latlon grid is monotonically increasing and that the domains + # are correct + lat, lon, t2store = checkLatLon(lat, lon, t2store) return lat, lon, timestore, t2store def find_latlon_ranges(filelist, lat_var_name, lon_var_name): @@ -524,7 +504,7 @@ def find_latlon_ranges(filelist, lat_var_name, lon_var_name): filename = filelist[0] try: - f = Nio.open_file(filename) + f = netCDF4.Dataset(filename, mode='r') lats = f.variables[lat_var_name][:] latMin = lats.min() @@ -622,30 +602,30 @@ def writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, obsRgnAvg, md dimY = mdlData.shape[2] # y-dimension dimX = mdlData.shape[3] # x-dimension dimR = obsRgnAvg.shape[1] # the number of subregions - f = Nio.open_file(fileName, mode='w', format='nc') + f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4') print mdlRgnAvg.shape, dimM, dimR, dimT #create global attributes - f.globalAttName = '' + f.description = '' # create dimensions print 'Creating Dimensions within the NetCDF Object...' - f.create_dimension('unity', 1) - f.create_dimension('time', dimT) - f.create_dimension('west_east', dimX) - f.create_dimension('south_north', dimY) - f.create_dimension('obs', dimO) - f.create_dimension('models', dimM) + f.createDimension('unity', 1) + f.createDimension('time', dimT) + f.createDimension('west_east', dimX) + f.createDimension('south_north', dimY) + f.createDimension('obs', dimO) + f.createDimension('models', dimM) # create the variable (real*4) to be written in the file print 'Creating Variables...' - f.create_variable('lon', 'd', ('south_north', 'west_east')) - f.create_variable('lat', 'd', ('south_north', 'west_east')) - f.create_variable('oDat', 'd', ('obs', 'time', 'south_north', 'west_east')) - f.create_variable('mDat', 'd', ('models', 'time', 'south_north', 'west_east')) + f.createVariable('lon', 'd', ('south_north', 'west_east')) + f.createVariable('lat', 'd', ('south_north', 'west_east')) + f.createVariable('oDat', 'd', ('obs', 'time', 'south_north', 'west_east')) + f.createVariable('mDat', 'd', ('models', 'time', 'south_north', 'west_east')) if subRegions: - f.create_dimension('regions', dimR) - f.create_variable('oRgn', 'd', ('obs', 'regions', 'time')) - f.create_variable('mRgn', 'd', ('models', 'regions', 'time')) + f.createDimension('regions', dimR) + f.createVariable('oRgn', 'd', ('obs', 'regions', 'time')) + f.createVariable('mRgn', 'd', ('models', 'regions', 'time')) f.variables['oRgn'].varAttName = 'Observation time series: Subregions' f.variables['mRgn'].varAttName = 'Model time series: Subregions' @@ -664,33 +644,140 @@ def writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, obsRgnAvg, md f.variables['mDat'].varAttName = 'Model time series: entire domain' # assign the values to the variable and write it - f.variables['lon'][:, :] = lons - f.variables['lat'][:, :] = lats + f.variables['lon'][:] = lons[:] + f.variables['lat'][:] = lats[:] if subRegions: - f.variables['oRgn'][:, :, :] = obsRgnAvg - f.variables['mRgn'][:, :, :] = mdlRgnAvg + f.variables['oRgn'][:] = obsRgnAvg[:] + f.variables['mRgn'][:] = mdlRgnAvg[:] f.close() def loadDataIntoNetCDF(fileObject, datasets, dataArray, dataType): """ Input:: - fileObject - PyNIO file object data will be loaded into + fileObject - netCDF4 file object data will be loaded into datasets - List of dataset names dataArray - Multi-dimensional array of data to be loaded into the NetCDF file dataType - String with value of either 'Model' or 'Observation' Output:: - No return value. PyNIO file object is updated in place + No return value. netCDF4 file object is updated in place """ datasetCount = 0 - for dataset in datasets: + for datasetCount, dataset in enumerate(datasets): if dataType.lower() == 'observation': datasetName = dataset.replace(' ','') elif dataType.lower() == 'model': datasetName = path.splitext(path.basename(dataset))[0] print "Creating variable %s" % datasetName - fileObject.create_variable(datasetName, 'd', ('time', 'south_north', 'west_east')) + fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 'west_east')) fileObject.variables[datasetName].varAttName = 'Obseration time series: entire domain' print 'Loading values into %s' % datasetName - fileObject.variables[datasetName].assign_value(dataArray[datasetCount,:,:,:]) - datasetCount += 1 + fileObject.variables[datasetName][:] = dataArray[datasetCount,:,:,:] + +def checkLatLon(latsin, lonsin, datain): + """ + Purpose:: + Checks whether latitudes and longitudes are monotonically increasing + within the domains [-90, 90) and [-180, 180) respectively, and rearranges the input data + accordingly if they are not. + + Input:: + latsin - Array of latitudes read from a raw netcdf file + lonsin - Array of longitudes read from a raw netcdf file + datain - Array of data values read from a raw netcdf file. + The shape is assumed to be (..., nLat, nLon). + + Output:: + latsout - 2D array of (rearranged) latitudes + lonsout - 2D array of (rearranged) longitudes + dataout - Array of (rearranged) data + """ + # Avoid unnecessary shifting if all lons are higher than 180 + if lonsin.min() > 180: + lonsin -= 360 + + # Make sure lats and lons are monotonically increasing + latsDecreasing = np.diff(latsin[:, 0]) < 0 + lonsDecreasing = np.diff(lonsin[0]) < 0 + + # If all values are decreasing then they just need to be reversed + latsReversed, lonsReversed = latsDecreasing.all(), lonsDecreasing.all() + + # If the lat values are unsorted then raise an exception + if not latsReversed and latsDecreasing.any(): + raise ValueError('Latitudes must be monotonically increasing.') + + # Perform same checks now for lons + if not lonsReversed and lonsDecreasing.any(): + raise ValueError('Longitudes must be monotonically increasing.') + + # Also check if lons go from [0, 360), and convert to [-180, 180) + # if necessary + lonsShifted = lonsin.max() > 180 + latsout, lonsout, dataout = latsin[:], lonsin[:], datain[:] + # Now correct data if latlon grid needs to be shifted + if latsReversed: + latsout = latsout[::-1] + dataout = dataout[..., ::-1, :] + + if lonsReversed: + lonsout = lonsout[..., ::-1] + dataout = dataout[..., ::-1] + + if lonsShifted: + lat1d = latsout[:, 0] + dataout, lon1d = shiftgrid(180, dataout, lonsout[0], start=False) + lonsout, latsout = np.meshgrid(lon1d, lat1d) + + return latsout, lonsout, dataout + +def shiftgrid(lon0, datain, lonsin, start= True, cyclic=360.0): + """ + Purpose:: + Shift global lat/lon grid east or west. This function is taken directly + from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does not + currently support arrays with more than two dimensions. + https://github.com/matplotlib/basemap + + Input:: + lon0 - starting longitude for shifted grid (ending longitude if start=False). + lon0 must be on input grid (within the range of lonsin). + datain - original data with longitude the right-most dimension. + lonsin - original longitudes. + start - if True, lon0 represents the starting longitude of the new grid. + if False, lon0 is the ending longitude. Default True. + cyclic - width of periodic domain (default 360) + + Output:: + dataout - data on shifted grid + lonsout - lons on shifted grid + """ + if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4: + # Use all data instead of raise ValueError, 'cyclic point not included' + start_idx = 0 + else: + # If cyclic, remove the duplicate point + start_idx = 1 + if lon0 < lonsin[0] or lon0 > lonsin[-1]: + raise ValueError('lon0 outside of range of lonsin') + i0 = np.argmin(np.fabs(lonsin-lon0)) + i0_shift = len(lonsin)-i0 + if ma.isMA(datain): + dataout = ma.zeros(datain.shape,datain.dtype) + else: + dataout = np.zeros(datain.shape,datain.dtype) + if ma.isMA(lonsin): + lonsout = ma.zeros(lonsin.shape,lonsin.dtype) + else: + lonsout = np.zeros(lonsin.shape,lonsin.dtype) + if start: + lonsout[0:i0_shift] = lonsin[i0:] + else: + lonsout[0:i0_shift] = lonsin[i0:]-cyclic + dataout[...,0:i0_shift] = datain[...,i0:] + if start: + lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic + else: + lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx] + dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx] + return dataout,lonsout \ No newline at end of file http://git-wip-us.apache.org/repos/asf/climate/blob/dc091596/mccsearch/code/mainProg.py ---------------------------------------------------------------------- diff --git a/mccsearch/code/mainProg.py b/mccsearch/code/mainProg.py index 43ca921..bb032fc 100644 --- a/mccsearch/code/mainProg.py +++ b/mccsearch/code/mainProg.py @@ -1,10 +1,27 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + ''' # running the program ''' import sys import networkx as nx -import mccSearch +from mccSearch import numpy as np import numpy.ma as ma import files http://git-wip-us.apache.org/repos/asf/climate/blob/dc091596/mccsearch/code/mainProgTemplate.py ---------------------------------------------------------------------- diff --git a/mccsearch/code/mainProgTemplate.py b/mccsearch/code/mainProgTemplate.py index 9be65b5..d43788e 100644 --- a/mccsearch/code/mainProgTemplate.py +++ b/mccsearch/code/mainProgTemplate.py @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ''' # running the program ''' http://git-wip-us.apache.org/repos/asf/climate/blob/dc091596/mccsearch/code/mccSearch.py ---------------------------------------------------------------------- diff --git a/mccsearch/code/mccSearch.py b/mccsearch/code/mccSearch.py index b7dc4eb..e691ed5 100644 --- a/mccsearch/code/mccSearch.py +++ b/mccsearch/code/mccSearch.py @@ -1,18 +1,28 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ''' # All the functions for the MCC search algorithm # Following RCMES dataformat in format (t,lat,lon), value ''' -import datetime from datetime import timedelta, datetime -import calendar -import fileinput import glob import itertools -import json -import math -import Nio -from netCDF4 import Dataset, num2date, date2num +from netCDF4 import Dataset, date2num import numpy as np import numpy.ma as ma import os @@ -25,10 +35,10 @@ import sys import time import networkx as nx -import matplotlib.pyplot as plt import matplotlib.dates as mdates from matplotlib.dates import DateFormatter,HourLocator -from matplotlib import cm + +import matplotlib.pyplot as plt import matplotlib.cm as cm import matplotlib.colors as mcolors from matplotlib.ticker import FuncFormatter, FormatStrFormatter @@ -48,14 +58,14 @@ XRES = 4.0 #x direction spatial resolution in km YRES = 4.0 #y direction spatial resolution in km TRES = 1 #temporal resolution in hrs LAT_DISTANCE = 111.0 #the avg distance in km for 1deg lat for the region being considered -LON_DISTANCE = 111.0 #the avg distance in km for 1deg lon for the region being considered +LON_DISTANCE = 111.0 #the avg distance in km for 1deg lon for the region being considered STRUCTURING_ELEMENT = [[0,1,0],[1,1,1],[0,1,0]] #the matrix for determining the pattern for the contiguous boxes and must - #have same rank of the matrix it is being compared against + #have same rank of the matrix it is being compared against #criteria for determining cloud elements and edges T_BB_MAX = 243 #warmest temp to allow (-30C to -55C according to Morel and Sensi 2002) T_BB_MIN = 218 #cooler temp for the center of the system CONVECTIVE_FRACTION = 0.90 #the min temp/max temp that would be expected in a CE.. this is highly conservative (only a 10K difference) -MIN_MCS_DURATION = 3 #minimum time for a MCS to exist +MIN_MCS_DURATION = 3 #minimum time for a MCS to exist AREA_MIN = 2400.0 #minimum area for CE criteria in km^2 according to Vila et al. (2008) is 2400 MIN_OVERLAP= 10000.00 #km^2 from Williams and Houze 1987, indir ref in Arnaud et al 1992 @@ -80,19 +90,19 @@ PRUNED_GRAPH = nx.DiGraph() def readMergData(dirname, filelist = None): ''' Purpose:: - Read MERG data into RCMES format + Read MERG data into RCMES format Input:: - dirname: a string representing the directory to the MERG files in NETCDF format - filelist (optional): a list of strings representing the filenames betweent the start and end dates provided + dirname: a string representing the directory to the MERG files in NETCDF format + filelist (optional): a list of strings representing the filenames betweent the start and end dates provided Output:: - A 3D masked array (t,lat,lon) with only the variables which meet the minimum temperature - criteria for each frame + A 3D masked array (t,lat,lon) with only the variables which meet the minimum temperature + criteria for each frame Assumptions:: - The MERG data has been converted to NETCDF using LATS4D - The data has the same lat/lon format + The MERG data has been converted to NETCDF using LATS4D + The data has the same lat/lon format TODO:: figure out how to use netCDF4 to do the clipping tmp = netCDF4.Dataset(filelist[0]) @@ -129,8 +139,9 @@ def readMergData(dirname, filelist = None): sys.exit() else: # Open the first file in the list to read in lats, lons and generate the grid for comparison - tmp = Nio.open_file(filelist[0], format='nc') - + #tmp = Nio.open_file(filelist[0], format='nc') + tmp = Dataset(filelist[0], format='NETCDF4') + #clip the lat/lon grid according to user input #http://www.pyngl.ucar.edu/NioExtendedSelection.shtml latsraw = tmp.variables[mergLatVarName][mergLatVarName+"|"+LATMIN+":"+LATMAX].astype('f2') @@ -146,11 +157,12 @@ def readMergData(dirname, filelist = None): for files in filelist: try: - thisFile = Nio.open_file(files, format='nc') + #thisFile = Nio.open_file(files, format='nc') + thisFile = Dataset(files, format='NETCDF4') #clip the dataset according to user lat,lon coordinates #mask the data and fill with zeros for later tempRaw = thisFile.variables[mergVarName][mergLatVarName+"|"+LATMIN+":"+LATMAX \ - +" " +mergLonVarName+"|"+LONMIN+":"+LONMAX ].astype('int16') + +" " +mergLonVarName+"|"+LONMIN+":"+LONMAX ].astype('int16') tempMask = ma.masked_array(tempRaw, mask=(tempRaw > T_BB_MAX), fill_value=0) @@ -180,8 +192,8 @@ def readMergData(dirname, filelist = None): def findCloudElements(mergImgs,timelist,TRMMdirName=None): ''' Purpose:: - Determines the contiguous boxes for a given time of the satellite images i.e. each frame - using scipy ndimage package + Determines the contiguous boxes for a given time of the satellite images i.e. each frame + using scipy ndimage package Input:: mergImgs: masked numpy array in (time,lat,lon),T_bb representing the satellite data. This is masked based on the @@ -190,8 +202,8 @@ def findCloudElements(mergImgs,timelist,TRMMdirName=None): TRMMdirName (optional): string representing the path where to find the TRMM datafiles Output:: - CLOUD_ELEMENT_GRAPH: a Networkx directed graph where each node contains the information in cloudElementDict - The nodes are determined according to the area of contiguous squares. The nodes are linked through weighted edges. + CLOUD_ELEMENT_GRAPH: a Networkx directed graph where each node contains the information in cloudElementDict + The nodes are determined according to the area of contiguous squares. The nodes are linked through weighted edges. cloudElementDict = {'uniqueID': unique tag for this CE, 'cloudElementTime': time of the CE, @@ -207,9 +219,9 @@ def findCloudElements(mergImgs,timelist,TRMMdirName=None): 'CETRMMmax':floating-point representing the max rate in the CE if TRMMdirName entered, 'CETRMMmin':floating-point representing the min rate in the CE if TRMMdirName entered} Assumptions:: - Assumes we are dealing with MERG data which is 4kmx4km resolved, thus the smallest value - required according to Vila et al. (2008) is 2400km^2 - therefore, 2400/16 = 150 contiguous squares + Assumes we are dealing with MERG data which is 4kmx4km resolved, thus the smallest value + required according to Vila et al. (2008) is 2400km^2 + therefore, 2400/16 = 150 contiguous squares ''' frame = ma.empty((1,mergImgs.shape[1],mergImgs.shape[2])) @@ -293,7 +305,7 @@ def findCloudElements(mergImgs,timelist,TRMMdirName=None): CEuniqueID = 'F'+str(frameNum)+'CE'+str(frameCEcounter) #------------------------------------------------- - #textfile name for accesing CE data using MATLAB code + #textfile name for accesing CE data using MATLAB code # thisFileName = MAINDIRECTORY+'/' + (str(timelist[t])).replace(" ", "_") + CEuniqueID +'.txt' # cloudElementsTextFile = open(thisFileName,'w') #------------------------------------------------- @@ -417,8 +429,8 @@ def findCloudElements(mergImgs,timelist,TRMMdirName=None): #-----------End most of NETCDF file stuff ------------------------------------ #populate cloudElementLatLons by unpacking the original values from loc to get the actual value for lat and lon - #TODO: KDW - too dirty... play with itertools.izip or zip and the enumerate with this - # as cloudElement is masked + #TODO: KDW - too dirty... play with itertools.izip or zip and the enumerate with this + # as cloudElement is masked for index,value in np.ndenumerate(cloudElement): if value != 0 : lat_index,lon_index = index @@ -443,7 +455,7 @@ def findCloudElements(mergImgs,timelist,TRMMdirName=None): #calculate the total precip associated with the feature for index, value in np.ndenumerate(finalCETRMMvalues): precipTotal += value - precip.append(value) + precip.append(value) rainFallacc[:] = finalCETRMMvalues[:] currNetCDFTRMMData.close() @@ -460,19 +472,19 @@ def findCloudElements(mergImgs,timelist,TRMMdirName=None): #determine if the cloud element the shape cloudElementEpsilon = eccentricity (cloudElement) - cloudElementsUserFile.write("\n\nTime is: %s" %(str(timelist[t]))) - cloudElementsUserFile.write("\nCEuniqueID is: %s" %CEuniqueID) - latCenter, lonCenter = ndimage.measurements.center_of_mass(cloudElement, labels=labels) - - #latCenter and lonCenter are given according to the particular array defining this CE - #so you need to convert this value to the overall domain truth - latCenter = cloudElementLat[round(latCenter)] - lonCenter = cloudElementLon[round(lonCenter)] - cloudElementsUserFile.write("\nCenter (lat,lon) is: %.2f\t%.2f" %(latCenter, lonCenter)) - cloudElementCenter.append(latCenter) - cloudElementCenter.append(lonCenter) - cloudElementsUserFile.write("\nNumber of boxes are: %d" %numOfBoxes) - cloudElementsUserFile.write("\nArea is: %.4f km^2" %(cloudElementArea)) + cloudElementsUserFile.write("\n\nTime is: %s" %(str(timelist[t]))) + cloudElementsUserFile.write("\nCEuniqueID is: %s" %CEuniqueID) + latCenter, lonCenter = ndimage.measurements.center_of_mass(cloudElement, labels=labels) + + #latCenter and lonCenter are given according to the particular array defining this CE + #so you need to convert this value to the overall domain truth + latCenter = cloudElementLat[round(latCenter)] + lonCenter = cloudElementLon[round(lonCenter)] + cloudElementsUserFile.write("\nCenter (lat,lon) is: %.2f\t%.2f" %(latCenter, lonCenter)) + cloudElementCenter.append(latCenter) + cloudElementCenter.append(lonCenter) + cloudElementsUserFile.write("\nNumber of boxes are: %d" %numOfBoxes) + cloudElementsUserFile.write("\nArea is: %.4f km^2" %(cloudElementArea)) cloudElementsUserFile.write("\nAverage brightness temperature is: %.4f K" %ndimage.mean(cloudElement, labels=labels)) cloudElementsUserFile.write("\nMin brightness temperature is: %.4f K" %ndimage.minimum(cloudElement, labels=labels)) cloudElementsUserFile.write("\nMax brightness temperature is: %.4f K" %ndimage.maximum(cloudElement, labels=labels)) @@ -507,42 +519,42 @@ def findCloudElements(mergImgs,timelist,TRMMdirName=None): elif areaOverlap >= MIN_OVERLAP: CLOUD_ELEMENT_GRAPH.add_edge(cloudElementDict['uniqueID'], CEuniqueID, weight=edgeWeight[2]) - else: - #TODO: remove this else as we only wish for the CE details - #ensure only the non-zero elements are considered - #store intel in allCE file - labels, _ = ndimage.label(cloudElement) - cloudElementsFile.write("\n-----------------------------------------------") - cloudElementsFile.write("\n\nTime is: %s" %(str(timelist[t]))) - # cloudElementLat = LAT[loc[0],0] - # cloudElementLon = LON[0,loc[1]] - - #populate cloudElementLatLons by unpacking the original values from loc - #TODO: KDW - too dirty... play with itertools.izip or zip and the enumerate with this - # as cloudElement is masked - for index,value in np.ndenumerate(cloudElement): - if value != 0 : - lat_index,lon_index = index - lat_lon_tuple = (cloudElementLat[lat_index], cloudElementLon[lon_index]) - cloudElementLatLons.append(lat_lon_tuple) - - cloudElementsFile.write("\nLocation of rejected CE (lat,lon) points are: %s" %cloudElementLatLons) - #latCenter and lonCenter are given according to the particular array defining this CE + else: + #TODO: remove this else as we only wish for the CE details + #ensure only the non-zero elements are considered + #store intel in allCE file + labels, _ = ndimage.label(cloudElement) + cloudElementsFile.write("\n-----------------------------------------------") + cloudElementsFile.write("\n\nTime is: %s" %(str(timelist[t]))) + # cloudElementLat = LAT[loc[0],0] + # cloudElementLon = LON[0,loc[1]] + + #populate cloudElementLatLons by unpacking the original values from loc + #TODO: KDW - too dirty... play with itertools.izip or zip and the enumerate with this + # as cloudElement is masked + for index,value in np.ndenumerate(cloudElement): + if value != 0 : + lat_index,lon_index = index + lat_lon_tuple = (cloudElementLat[lat_index], cloudElementLon[lon_index]) + cloudElementLatLons.append(lat_lon_tuple) + + cloudElementsFile.write("\nLocation of rejected CE (lat,lon) points are: %s" %cloudElementLatLons) + #latCenter and lonCenter are given according to the particular array defining this CE #so you need to convert this value to the overall domain truth - latCenter, lonCenter = ndimage.measurements.center_of_mass(cloudElement, labels=labels) - latCenter = cloudElementLat[round(latCenter)] - lonCenter = cloudElementLon[round(lonCenter)] - cloudElementsFile.write("\nCenter (lat,lon) is: %.2f\t%.2f" %(latCenter, lonCenter)) - cloudElementsFile.write("\nNumber of boxes are: %d" %numOfBoxes) - cloudElementsFile.write("\nArea is: %.4f km^2" %(cloudElementArea)) - cloudElementsFile.write("\nAverage brightness temperature is: %.4f K" %ndimage.mean(cloudElement, labels=labels)) - cloudElementsFile.write("\nMin brightness temperature is: %.4f K" %ndimage.minimum(cloudElement, labels=labels)) - cloudElementsFile.write("\nMax brightness temperature is: %.4f K" %ndimage.maximum(cloudElement, labels=labels)) - cloudElementsFile.write("\nBrightness temperature variance is: %.4f K" %ndimage.variance(cloudElement, labels=labels)) - cloudElementsFile.write("\nConvective fraction is: %.4f " %(((ndimage.minimum(cloudElement, labels=labels))/float((ndimage.maximum(cloudElement, labels=labels))))*100.0)) - cloudElementsFile.write("\nEccentricity is: %.4f " %(cloudElementEpsilon)) - cloudElementsFile.write("\n-----------------------------------------------") - + latCenter, lonCenter = ndimage.measurements.center_of_mass(cloudElement, labels=labels) + latCenter = cloudElementLat[round(latCenter)] + lonCenter = cloudElementLon[round(lonCenter)] + cloudElementsFile.write("\nCenter (lat,lon) is: %.2f\t%.2f" %(latCenter, lonCenter)) + cloudElementsFile.write("\nNumber of boxes are: %d" %numOfBoxes) + cloudElementsFile.write("\nArea is: %.4f km^2" %(cloudElementArea)) + cloudElementsFile.write("\nAverage brightness temperature is: %.4f K" %ndimage.mean(cloudElement, labels=labels)) + cloudElementsFile.write("\nMin brightness temperature is: %.4f K" %ndimage.minimum(cloudElement, labels=labels)) + cloudElementsFile.write("\nMax brightness temperature is: %.4f K" %ndimage.maximum(cloudElement, labels=labels)) + cloudElementsFile.write("\nBrightness temperature variance is: %.4f K" %ndimage.variance(cloudElement, labels=labels)) + cloudElementsFile.write("\nConvective fraction is: %.4f " %(((ndimage.minimum(cloudElement, labels=labels))/float((ndimage.maximum(cloudElement, labels=labels))))*100.0)) + cloudElementsFile.write("\nEccentricity is: %.4f " %(cloudElementEpsilon)) + cloudElementsFile.write("\n-----------------------------------------------") + #reset list for the next CE nodeExist = False cloudElementCenter=[] @@ -563,7 +575,7 @@ def findCloudElements(mergImgs,timelist,TRMMdirName=None): prevFrameCEs =[] prevFrameCEs = currFrameCEs currFrameCEs =[] - + cloudElementsFile.close cloudElementsUserFile.close #if using ARCGIS data store code, uncomment this file close line @@ -593,11 +605,11 @@ def findPrecipRate(TRMMdirName, timelist): TRMMdirName: a string representing the directory for the original TRMM netCDF files timelist: a list of python datatimes - Output:: a list of dictionary of the TRMM data - NB: also creates netCDF with TRMM data for each CE (for post processing) index - in MAINDIRECTORY/TRMMnetcdfCEs + Output:: a list of dictionary of the TRMM data + NB: also creates netCDF with TRMM data for each CE (for post processing) index + in MAINDIRECTORY/TRMMnetcdfCEs - Assumptions:: Assumes that findCloudElements was run without the TRMMdirName value + Assumptions:: Assumes that findCloudElements was run without the TRMMdirName value ''' allCEnodesTRMMdata =[] @@ -765,14 +777,14 @@ def findCloudClusters(CEGraph): ''' Purpose:: Determines the cloud clusters properties from the subgraphs in - the graph i.e. prunes the graph according to the minimum depth + the graph i.e. prunes the graph according to the minimum depth Input:: CEGraph: a Networkx directed graph of the CEs with weighted edges according the area overlap between nodes (CEs) of consectuive frames - - Output:: - PRUNED_GRAPH: a Networkx directed graph of with CCs/ MCSs + + Output:: + PRUNED_GRAPH: a Networkx directed graph of with CCs/ MCSs ''' @@ -828,11 +840,11 @@ def findMCC (prunedGraph): Input:: prunedGraph: a Networkx Graph representing the CCs - Output:: - finalMCCList: a list of list of tuples representing a MCC - - Assumptions: - frames are ordered and are equally distributed in time e.g. hrly satellite images + Output:: + finalMCCList: a list of list of tuples representing a MCC + + Assumptions: + frames are ordered and are equally distributed in time e.g. hrly satellite images ''' MCCList = [] @@ -999,12 +1011,12 @@ def traverseTree(subGraph,node, stack, checkedNodes=None): stack: a list of strings representing a list of nodes in a stack functionality i.e. Last-In-First-Out (LIFO) for sorting the information from each visited node checkedNodes: a list of strings representing the list of the nodes in the traversal - - Output:: - checkedNodes: a list of strings representing the list of the nodes in the traversal + + Output:: + checkedNodes: a list of strings representing the list of the nodes in the traversal - Assumptions: - frames are ordered and are equally distributed in time e.g. hrly satellite images + Assumptions: + frames are ordered and are equally distributed in time e.g. hrly satellite images ''' if len(checkedNodes) == len(subGraph): @@ -1043,7 +1055,7 @@ def checkedNodesMCC (prunedGraph, nodeList): ''' Purpose :: Determine if this path is (or is part of) a MCC and provides - preliminary information regarding the stages of the feature + preliminary information regarding the stages of the feature Input:: prunedGraph: a Networkx Graph representing all the cloud clusters @@ -1154,7 +1166,7 @@ def updateMCCList(prunedGraph, potentialMCCList,node,stage, CounterCriteriaAFlag ''' Purpose:: Utility function to determine if a path is (or is part of) a MCC and provides - preliminary information regarding the stages of the feature + preliminary information regarding the stages of the feature Input:: prunedGraph: a Networkx Graph representing all the cloud clusters @@ -1389,7 +1401,7 @@ def maxExtentAndEccentricity(eachList): Output:: maxShieldNode: a string representing the node with the maximum maxShieldNode - definiteMCCFlag: a boolean indicating that the MCC has met all requirements + definiteMCCFlag: a boolean indicating that the MCC has met all requirements ''' maxShieldNode ='' @@ -1581,7 +1593,7 @@ def allAncestors(path, aNode): Input:: path: a list of strings representing the nodes in the path - aNode: a string representing a node to be checked for parents + aNode: a string representing a node to be checked for parents Output:: path: a list of strings representing the list of the nodes connected to aNode through its parents @@ -1607,7 +1619,7 @@ def allDescendants(path, aNode): Input:: path: a list of strings representing the nodes in the path - aNode: a string representing a node to be checked for children + aNode: a string representing a node to be checked for children Output:: path: a list of strings representing the list of the nodes connected to aNode through its children @@ -1653,7 +1665,7 @@ def addNodeBehaviorIdentifier (thisNode, nodeBehaviorIdentifier): Input:: thisNode: a string representing the unique ID of a node - nodeBehaviorIdentifier: a string representing the behavior S- split, M- merge, B- both split and merge, N- neither split or merge + nodeBehaviorIdentifier: a string representing the behavior S- split, M- merge, B- both split and merge, N- neither split or merge Output :: None @@ -1703,9 +1715,9 @@ def updateNodeMCSIdentifier (thisNode, nodeMCSIdentifier): def eccentricity (cloudElementLatLon): ''' Purpose:: - Determines the eccentricity (shape) of contiguous boxes - Values tending to 1 are more circular by definition, whereas - values tending to 0 are more linear + Determines the eccentricity (shape) of contiguous boxes + Values tending to 1 are more circular by definition, whereas + values tending to 0 are more linear Input:: cloudElementLatLon: 2D array in (lat,lon) representing T_bb contiguous squares @@ -1720,29 +1732,29 @@ def eccentricity (cloudElementLatLon): #loop over all lons and determine longest (non-zero) col #loop over all lats and determine longest (non-zero) row for latLon in cloudElementLatLon: - #assign a matrix to determine the legit values - - nonEmptyLons = sum(sum(cloudElementLatLon)>0) - nonEmptyLats = sum(sum(cloudElementLatLon.transpose())>0) - - lonEigenvalues = 1.0 * nonEmptyLats / (nonEmptyLons+0.001) #for long oval on y axis - latEigenvalues = 1.0 * nonEmptyLons / (nonEmptyLats +0.001) #for long oval on x-axs - epsilon = min(latEigenvalues,lonEigenvalues) - + #assign a matrix to determine the legit values + + nonEmptyLons = sum(sum(cloudElementLatLon)>0) + nonEmptyLats = sum(sum(cloudElementLatLon.transpose())>0) + + lonEigenvalues = 1.0 * nonEmptyLats / (nonEmptyLons+0.001) #for long oval on y axis + latEigenvalues = 1.0 * nonEmptyLons / (nonEmptyLats +0.001) #for long oval on x-axs + epsilon = min(latEigenvalues,lonEigenvalues) + return epsilon #****************************************************************** def cloudElementOverlap (currentCELatLons, previousCELatLons): ''' Purpose:: - Determines the percentage overlap between two list of lat-lons passed + Determines the percentage overlap between two list of lat-lons passed Input:: - currentCELatLons: a list of tuples for the current CE - previousCELatLons: a list of tuples for the other CE being considered + currentCELatLons: a list of tuples for the current CE + previousCELatLons: a list of tuples for the other CE being considered Output:: - percentageOverlap: a floating-point representing the number of overlapping lat_lon tuples - areaOverlap: a floating-point number representing the area overlapping + percentageOverlap: a floating-point representing the number of overlapping lat_lon tuples + areaOverlap: a floating-point number representing the area overlapping ''' @@ -1829,8 +1841,8 @@ def findCESpeed(node, MCSList): def maenumerate(mArray): ''' Purpose:: - Utility script for returning the actual values from the masked array - Taken from: http://stackoverflow.com/questions/8620798/numpy-ndenumerate-for-masked-arrays + Utility script for returning the actual values from the masked array + Taken from: http://stackoverflow.com/questions/8620798/numpy-ndenumerate-for-masked-arrays Input:: mArray: the masked array returned from the ma.array() command @@ -1844,7 +1856,7 @@ def maenumerate(mArray): mask = ~mArray.mask.ravel() #beware yield fast, but generates a type called "generate" that does not allow for array methods for index, maskedValue in itertools.izip(np.ndenumerate(mArray), mask): - if maskedValue: + if maskedValue: yield index #****************************************************************** def createMainDirectory(mainDirStr): @@ -2046,20 +2058,20 @@ def find_nearest(thisArray,value): def preprocessingMERG(MERGdirname): ''' Purpose:: - Utility script for unzipping and converting the merg*.Z files from Mirador to - NETCDF format. The files end up in a folder called mergNETCDF in the directory - where the raw MERG data is - NOTE: VERY RAW AND DIRTY + Utility script for unzipping and converting the merg*.Z files from Mirador to + NETCDF format. The files end up in a folder called mergNETCDF in the directory + where the raw MERG data is + NOTE: VERY RAW AND DIRTY Input:: - Directory to the location of the raw MERG files, preferably zipped + Directory to the location of the raw MERG files, preferably zipped Output:: none Assumptions:: 1 GrADS (http://www.iges.org/grads/gadoc/) and lats4D (http://opengrads.org/doc/scripts/lats4d/) - have been installed on the system and the user can access + have been installed on the system and the user can access 2 User can write files in location where script is being called 3 the files havent been unzipped ''' @@ -2156,20 +2168,20 @@ def postProcessingNetCDF(dataset, dirName = None): TODO: UPDATE TO PICK UP LIMITS FROM FILE FOR THE GRADS SCRIPTS Purpose:: - Utility script displaying the data in generated NETCDF4 files - in GrADS - NOTE: VERY RAW AND DIRTY + Utility script displaying the data in generated NETCDF4 files + in GrADS + NOTE: VERY RAW AND DIRTY Input:: - dataset: integer representing post-processed MERG (1) or TRMM data (2) or original MERG(3) - string: Directory to the location of the raw (MERG) files, preferably zipped + dataset: integer representing post-processed MERG (1) or TRMM data (2) or original MERG(3) + string: Directory to the location of the raw (MERG) files, preferably zipped Output:: images in location as specfied in the code Assumptions:: 1 GrADS (http://www.iges.org/grads/gadoc/) and lats4D (http://opengrads.org/doc/scripts/lats4d/) - have been installed on the system and the user can access + have been installed on the system and the user can access 2 User can write files in location where script is being called ''' @@ -2190,7 +2202,7 @@ def postProcessingNetCDF(dataset, dirName = None): if dataset == 1: var = 'ch4' ctlTitle = 'TITLE MCC search Output Grid: Time lat lon' - ctlLine = 'brightnesstemp=\>ch4 1 t,y,x brightnesstemperature' + ctlLine = 'brightnesstemp=\>ch4 1 t,y,x brightnesstemperature' origsFile = coreDir+"/../GrADSscripts/cs1.gs" gsFile = coreDir+"/../GrADSscripts/cs2.gs" sologsFile = coreDir+"/../GrADSscripts/mergeCE.gs" @@ -2199,7 +2211,7 @@ def postProcessingNetCDF(dataset, dirName = None): elif dataset ==2: var = 'precipAcc' ctlTitle ='TITLE TRMM MCS accumulated precipitation search Output Grid: Time lat lon ' - ctlLine = 'precipitation_Accumulation=\>precipAcc 1 t,y,x precipAccu' + ctlLine = 'precipitation_Accumulation=\>precipAcc 1 t,y,x precipAccu' origsFile = coreDir+"/../GrADSscripts/cs3.gs" gsFile = coreDir+"/../GrADSscripts/cs4.gs" sologsFile = coreDir+"/../GrADSscripts/TRMMCE.gs" @@ -2208,7 +2220,7 @@ def postProcessingNetCDF(dataset, dirName = None): elif dataset ==3: var = 'ch4' ctlTitle = 'TITLE MERG DATA' - ctlLine = 'ch4=\>ch4 1 t,y,x brightnesstemperature' + ctlLine = 'ch4=\>ch4 1 t,y,x brightnesstemperature' origsFile = coreDir+"/../GrADSscripts/cs1.gs" sologsFile = coreDir+"/../GrADSscripts/infrared.gs" lineNum = 54 @@ -2415,15 +2427,15 @@ def tempMaskedImages(imgFilename): To generate temperature-masked images for a first pass verification Input:: - imgFilename: filename for the gif file + imgFilename: filename for the gif file Output:: - None - Gif images for each file of T_bb less than 250K are generated in folder called mergImgs + None - Gif images for each file of T_bb less than 250K are generated in folder called mergImgs Assumptions:: Same as for preprocessingMERG 1 GrADS (http://www.iges.org/grads/gadoc/) and lats4D (http://opengrads.org/doc/scripts/lats4d/) - have been installed on the system and the user can access + have been installed on the system and the user can access 2 User can write files in location where script is being called 3 the files havent been unzipped ''' @@ -2445,7 +2457,7 @@ def tempMaskedImages(imgFilename): return #****************************************************************** # -# METRICS FUNCTIONS FOR MERG.PY +# METRICS FUNCTIONS FOR MERG.PY # TODO: rewrite these metrics so that they use the data from the # file instead of the graph as this reduce mem resources needed # @@ -2523,7 +2535,7 @@ def longestDuration(allMCCtimes): Output:: an integer - lenMCC: representing the duration of the longest MCC found - a list of strings - longestMCC: representing the nodes of longest MCC + a list of strings - longestMCC: representing the nodes of longest MCC Assumptions:: @@ -2551,7 +2563,7 @@ def shortestDuration(allMCCtimes): of MCC temporal details for each MCC in the period considered Output::an integer - lenMCC: representing the duration of the shortest MCC found - a list of strings - longestMCC: representing the nodes of shortest MCC + a list of strings - longestMCC: representing the nodes of shortest MCC Assumptions:: @@ -2577,7 +2589,7 @@ def averageDuration(allMCCtimes): of MCC temporal details for each MCC in the period considered Output::a floating-point representing the average duration of a MCC in the period - + Assumptions:: ''' @@ -2614,7 +2626,7 @@ def averageFeatureSize(finalMCCList): Input:: a list of list of strings - finalMCCList: a list of list of nodes representing a MCC Output::a floating-point representing the average area of a MCC in the period - + Assumptions:: ''' @@ -2643,7 +2655,7 @@ def commonFeatureSize(finalMCCList): Output:: a floating-point representing the average area of a MCC in the period - + Assumptions:: ''' @@ -2917,7 +2929,7 @@ def displayPrecip(finalMCCList): title = 'Precipitation distribution of the MCS ' fig,ax = plt.subplots(1, facecolor='white', figsize=(20,7)) - cmap = cm.jet + cmap = plt.jet ax.scatter(x, y, s=partialArea, c= colorBarTime, edgecolors='none', marker='o', cmap =cmap) colorBarTime=[] colorBarTime =list(set(timeList)) @@ -3012,12 +3024,12 @@ def plotPrecipHistograms(finalMCCList): # Set the formatter plt.gca().yaxis.set_major_formatter(formatter) plt.gca().xaxis.set_major_formatter(FormatStrFormatter('%0.0f')) - imgFilename = MAINDIRECTORY+'/images/'+str(thisTime)+eachNode['uniqueID']+'TRMMMCS.gif' - - plt.savefig(imgFilename, transparent=True) - precip =[] - - # ------ NETCDF File get info ------------------------------------ + imgFilename = MAINDIRECTORY+'/images/'+str(thisTime)+eachNode['uniqueID']+'TRMMMCS.gif' + + plt.savefig(imgFilename, transparent=True) + precip =[] + + # ------ NETCDF File get info ------------------------------------ thisFileName = MAINDIRECTORY+'/TRMMnetcdfCEs/TRMM' + str(thisTime).replace(" ", "_") + eachNode['uniqueID'] +'.nc' TRMMData = Dataset(thisFileName,'r', format='NETCDF4') precipRate = TRMMData.variables['precipitation_Accumulation'][:,:,:] @@ -3046,8 +3058,8 @@ def plotHistogram(aList, aTitle, aLabel): Input:: aList: the list of floating points representing values for e.g. averageArea, averageDuration, etc. - aTitle: a string representing the title and the name of the plot e.g. "Average area [km^2]" - aLabel: a string representing the x axis info i.e. the data that is being passed and the units e.g. "Area km^2" + aTitle: a string representing the title and the name of the plot e.g. "Average area [km^2]" + aLabel: a string representing the x axis info i.e. the data that is being passed and the units e.g. "Area km^2" Output:: plots (gif files) @@ -3143,7 +3155,7 @@ def plotAccTRMM (finalMCCList): accuPrecipRate += precipRate imgFilename = MAINDIRECTORY+'/images/MCSaccu'+firstPartName+str(thisNode['cloudElementTime']).replace(" ", "_")+'.gif' - #create new netCDF file + #create new netCDF file accuTRMMFile = MAINDIRECTORY+'/TRMMnetcdfCEs/accu'+firstPartName+str(thisNode['cloudElementTime']).replace(" ", "_")+'.nc' #write the file accuTRMMData = Dataset(accuTRMMFile, 'w', format='NETCDF4') @@ -3190,37 +3202,37 @@ def plotAccTRMM (finalMCCList): replaceExpXDef = 'echo XDEF '+ str(nxgrdTRMM) + ' LINEAR ' + str(min(lons)) +' '+ str((max(lons)-min(lons))/nxgrdTRMM) +' >> acc.ctl' subprocess.call(replaceExpXDef, shell=True) #subprocess.call('echo "XDEF 413 LINEAR -9.984375 0.036378335 " >> acc.ctl', shell=True) - #subprocess.call('echo "YDEF 412 LINEAR 5.03515625 0.036378335 " >> acc.ctl', shell=True) - replaceExpYDef = 'echo YDEF '+str(nygrdTRMM)+' LINEAR '+str(min(lats))+ ' '+str((max(lats)-min(lats))/nygrdTRMM)+' >>acc.ctl' - subprocess.call(replaceExpYDef, shell=True) - subprocess.call('echo "ZDEF 01 LEVELS 1" >> acc.ctl', shell=True) - subprocess.call('echo "TDEF 99999 linear 31aug2009 1hr" >> acc.ctl', shell=True) - #subprocess.call(replaceExpTdef, shell=True) - subprocess.call('echo "VARS 1" >> acc.ctl', shell=True) - subprocess.call('echo "precipitation_Accumulation=>precipAcc 1 t,y,x precipAccu" >> acc.ctl', shell=True) - subprocess.call('echo "ENDVARS" >> acc.ctl', shell=True) - - #generate GrADS script - subprocess.call('rm accuTRMM1.gs', shell=True) - subprocess.call('touch accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'reinit''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'open acc.ctl ''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'set grads off''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'set mpdset hires''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'set gxout shaded''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'set datawarn off''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'d precipacc''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'draw title TRMM Accumulated Precipitation [mm]''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'run cbarn''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'printim '+imgFilename +' x1000 y800 white''\'" >> accuTRMM1.gs', shell=True) - subprocess.call('echo "''\'quit''\'" >> accuTRMM1.gs', shell=True) - gradscmd = 'grads -blc ' + '\'run accuTRMM1.gs''\'' - subprocess.call(gradscmd, shell=True) - sys.exit() - - #clean up - subprocess.call('rm accuTRMM1.gs', shell=True) - subprocess.call('rm acc.ctl', shell=True) + #subprocess.call('echo "YDEF 412 LINEAR 5.03515625 0.036378335 " >> acc.ctl', shell=True) + replaceExpYDef = 'echo YDEF '+str(nygrdTRMM)+' LINEAR '+str(min(lats))+ ' '+str((max(lats)-min(lats))/nygrdTRMM)+' >>acc.ctl' + subprocess.call(replaceExpYDef, shell=True) + subprocess.call('echo "ZDEF 01 LEVELS 1" >> acc.ctl', shell=True) + subprocess.call('echo "TDEF 99999 linear 31aug2009 1hr" >> acc.ctl', shell=True) + #subprocess.call(replaceExpTdef, shell=True) + subprocess.call('echo "VARS 1" >> acc.ctl', shell=True) + subprocess.call('echo "precipitation_Accumulation=>precipAcc 1 t,y,x precipAccu" >> acc.ctl', shell=True) + subprocess.call('echo "ENDVARS" >> acc.ctl', shell=True) + + #generate GrADS script + subprocess.call('rm accuTRMM1.gs', shell=True) + subprocess.call('touch accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'reinit''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'open acc.ctl ''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'set grads off''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'set mpdset hires''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'set gxout shaded''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'set datawarn off''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'d precipacc''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'draw title TRMM Accumulated Precipitation [mm]''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'run cbarn''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'printim '+imgFilename +' x1000 y800 white''\'" >> accuTRMM1.gs', shell=True) + subprocess.call('echo "''\'quit''\'" >> accuTRMM1.gs', shell=True) + gradscmd = 'grads -blc ' + '\'run accuTRMM1.gs''\'' + subprocess.call(gradscmd, shell=True) + sys.exit() + + #clean up + subprocess.call('rm accuTRMM1.gs', shell=True) + subprocess.call('rm acc.ctl', shell=True) return #****************************************************************** @@ -3328,7 +3340,7 @@ def plotAccuInTimeRange(starttime, endtime): subprocess.call('echo "ZDEF 01 LEVELS 1" >> acc.ctl', shell=True) subprocess.call('echo "TDEF 99999 linear 31aug2009 1hr" >> acc.ctl', shell=True) subprocess.call('echo "VARS 1" >> acc.ctl', shell=True) - subprocess.call('echo "precipitation_Accumulation=>precipAcc 1 t,y,x precipAccu" >> acc.ctl', shell=True) + subprocess.call('echo "precipitation_Accumulation=>precipAcc 1 t,y,x precipAccu" >> acc.ctl', shell=True) subprocess.call('echo "ENDVARS" >> acc.ctl', shell=True) #generate GrADS script imgFilename = MAINDIRECTORY+'/images/accu'+starttime+'-'+endtime+'.gif' @@ -3715,33 +3727,32 @@ def colorbar_index(ncolors, nlabels, cmap): return #****************************************************************** def cmap_discretize(cmap, N): - ''' - Taken from: http://stackoverflow.com/questions/18704353/correcting-matplotlib-colorbar-ticks - http://wiki.scipy.org/Cookbook/Matplotlib/ColormapTransformations - Return a discrete colormap from the continuous colormap cmap. - - cmap: colormap instance, eg. cm.jet. - N: number of colors. - - Example - x = resize(arange(100), (5,100)) - djet = cmap_discretize(cm.jet, 5) - imshow(x, cmap=djet) - ''' - - if type(cmap) == str: - cmap = plt.get_cmap(cmap) - colors_i = np.concatenate((np.linspace(0, 1., N), (0.,0.,0.,0.))) - colors_rgba = cmap(colors_i) - indices = np.linspace(0, 1., N+1) - cdict = {} - for ki,key in enumerate(('red','green','blue')): - cdict[key] = [ (indices[i], colors_rgba[i-1,ki], colors_rgba[i,ki]) - for i in xrange(N+1) ] - # Return colormap object. - return mcolors.LinearSegmentedColormap(cmap.name + "_%d"%N, cdict, 1024) + ''' + Taken from: http://stackoverflow.com/questions/18704353/correcting-matplotlib-colorbar-ticks + http://wiki.scipy.org/Cookbook/Matplotlib/ColormapTransformations + Return a discrete colormap from the continuous colormap cmap. + + cmap: colormap instance, eg. cm.jet. + N: number of colors. + + Example + x = resize(arange(100), (5,100)) + djet = cmap_discretize(cm.jet, 5) + imshow(x, cmap=djet) + ''' + + if type(cmap) == str: + cmap = plt.get_cmap(cmap) + colors_i = np.concatenate((np.linspace(0, 1., N), (0.,0.,0.,0.))) + colors_rgba = cmap(colors_i) + indices = np.linspace(0, 1., N+1) + cdict = {} + for ki,key in enumerate(('red','green','blue')): + cdict[key] = [ (indices[i], colors_rgba[i-1,ki], colors_rgba[i,ki]) + for i in xrange(N+1) ] + # Return colormap object. + return mcolors.LinearSegmentedColormap(cmap.name + "_%d"%N, cdict, 1024) #****************************************************************** - http://git-wip-us.apache.org/repos/asf/climate/blob/dc091596/mccsearch/code/mccSearchUI.py ---------------------------------------------------------------------- diff --git a/mccsearch/code/mccSearchUI.py b/mccsearch/code/mccSearchUI.py index 45dc73a..885ff43 100644 --- a/mccsearch/code/mccSearchUI.py +++ b/mccsearch/code/mccSearchUI.py @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ''' # Wizard for running the mccSearch program '''