climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prami...@apache.org
Subject svn commit: r1537520 [6/34] - in /incubator/climate/branches/rcmet-2.1.2: ./ src/ src/main/ src/main/python/ src/main/python/bin/ src/main/python/docs/ src/main/python/docs/_static/ src/main/python/docs/_templates/ src/main/python/rcmes/ src/main/pytho...
Date Thu, 31 Oct 2013 14:59:54 GMT
Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py1
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py1?rev=1537520&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py1 (added)
+++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py1 Thu Oct 31 14:59:48 2013
@@ -0,0 +1,359 @@
+#
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+"""Collection of functions used to interface with the database and to create netCDF file
+"""
+import os
+import urllib2
+import re
+import numpy as np
+import numpy.ma as ma
+import json
+import netCDF4
+
+from classes import RCMED
+from toolkit import process
+from datetime import timedelta ,datetime
+from calendar import monthrange
+
+def reorderXYT(lons, lats, times, values):
+    # Re-order values in values array such that when reshaped everywhere is where it should be
+    #  (as DB doesn't necessarily return everything in order)
+    order = np.lexsort((lons, lats, times))
+    counter = 0
+    sortedValues = np.zeros_like(values)
+    sortedLats = np.zeros_like(lats)
+    sortedLons = np.zeros_like(lons)
+    for i in order:
+        sortedValues[counter] = values[i]
+        sortedLats[counter] = lats[i]
+        sortedLons[counter] = lons[i]
+        counter += 1
+    
+    return sortedValues, sortedLats, sortedLons
+
+def findUnique(seq, idfun=None):
+    """
+     Function to find unique values (used in construction of unique datetime list)
+     NB. order preserving
+     Input: seq  - a list of randomly ordered values
+     Output: result - list of ordered values
+    """
+    if idfun is None:
+        def idfun(x): 
+            return x
+
+    seen = {};
+    result = []
+    
+    for item in seq:
+        marker = idfun(item)
+        # in old Python versions:
+        # if seen.has_key(marker)
+        # but in new ones:
+        if marker in seen: continue
+        seen[marker] = 1
+        result.append(item)
+    return result
+
+def get_param_info(url):
+
+    '''
+    This function will get the general information by given URL from the parameter table.
+    '''
+    url = url + "&info=yes"
+    result = urllib2.urlopen(url)
+    datastring = result.read()
+    datastring=json.loads(datastring)
+    database=datastring["database"]
+    timestep=datastring["timestep"]
+    realm=datastring["realm"]
+    instrument=datastring["instrument"]
+    start_date=datastring["start_date"]
+    end_date=datastring["end_date"]
+    unit=datastring["units"]
+    
+    return database, timestep, realm, instrument, start_date, end_date, unit
+
+def get_data(url):
+    
+    '''
+    This function will get the url, query from database and will return datapoints' latitude, longitude, level, time and value.
+    '''
+
+    result = urllib2.urlopen(url)
+    datastring = result.read()    
+    d = re.search('data: \r\n', datastring)
+    data = datastring[d.end():len(datastring)]
+    
+    # To create a list of all datapoints
+    data=data.split('\r\n')    
+            
+    latitudes = []
+    longitudes = []
+    levels = []
+    values = []
+    timestamps = []
+    
+    # To make a series of lists from datapoints
+    for i in range(len(data)-1):  # Because the last row is empty, "len(data)-1" is used.
+        row=data[i].split(',')
+        latitudes.append(np.float32(row[0]))
+        longitudes.append(np.float32(row[1]))
+        levels.append(np.float32(row[2]))
+        # timestamps are strings so we will leave them alone for now
+        timestamps.append(row[3])
+        values.append(np.float32(row[4]))
+        
+    return latitudes, longitudes, levels, values, timestamps
+    
+
+def create_netCDF(latitudes, longitudes, levels, values, timestamps, database, latMin, latMax, lonMin, lonMax, startTime, endTime, unit, netCD_fileName):
+    
+    '''
+    This function will generate netCDF files.
+    '''
+        
+    # To generate netCDF file from database
+    netcdf =  netCDF4.Dataset(netCD_fileName,mode='w')
+    string="The netCDF file for parameter: " + database + ", latMin: " + str(latMin) + ", latMax: " + str(latMax) + ", lonMin: " + str(lonMin) + ", lonMax: " + str(lonMax) + " startTime: " + str(startTime) + " and endTime: " + str(endTime) + "."
+    netcdf.globalAttName = str(string)
+    netcdf.createDimension('dim', len(latitudes))
+    latitude = netcdf.createVariable('lat', 'd', ('dim',))
+    longitude = netcdf.createVariable('lon', 'd', ('dim',))
+    level = netcdf.createVariable('lev', 'd', ('dim',))
+    time = netcdf.createVariable('time', 'd', ('dim',))
+    value = netcdf.createVariable('value', 'd', ('dim',))
+    
+    netcdf.variables['lat'].varAttName = 'latitude'
+    netcdf.variables['lat'].units = 'degrees_north'
+    netcdf.variables['lon'].varAttName = 'longitude'
+    netcdf.variables['lon'].units = 'degrees_east'
+    netcdf.variables['time'].varAttName = 'time'
+    netcdf.variables['time'].units = 'hours since ' + str(startTime)
+    netcdf.variables['value'].varAttName = 'value'
+    netcdf.variables['value'].units = str(unit)
+    netcdf.variables['lev'].varAttName = 'level'
+    netcdf.variables['lev'].units = 'hPa'
+
+    hours=[]
+    timeFormat = "%Y-%m-%d %H:%M:%S"
+    base_date=startTime
+    # To convert the date to hours 
+    for t in timestamps:
+        date=datetime.strptime(t, timeFormat)
+        diff=date-base_date
+        hours.append(diff.days*24)
+        
+    latitude[:]=latitudes[:]
+    longitude[:]=longitudes[:]
+    level[:]=levels[:]
+    time[:]=hours[:]
+    value[:]=values[:]
+    netcdf.close()
+        
+def read_netcdf(netCD_fileName):
+    
+    '''
+    This function will read the existed netCDF file, convert the hours from netCDF time variable
+    and return latitudes, longitudes, levels, times and values.
+    '''
+    # To use the created netCDF file
+    netcdf = netCDF4.Dataset(netCD_fileName , mode='r')
+    # To get all data from netCDF file
+    latitudes = netcdf.variables['lat'][:]
+    longitudes = netcdf.variables['lon'][:]
+    levels = netcdf.variables['lev'][:]
+    hours = netcdf.variables['time'][:]
+    values = ma.array(netcdf.variables['value'][:])
+    
+    # To get the base date
+    time_unit=netcdf.variables['time'].units.encode()
+    time_unit=time_unit.split(' ')
+    base_date=time_unit[2] + " " + time_unit[3]
+    
+    netcdf.close()
+    
+    timeFormat = "%Y-%m-%d %H:%M:%S"
+    
+    # Because time in netCDF file is based on hours since a specific date, it needs to be converted to date format
+    times=[]
+    # To convert the base date to the python datetime format
+    base_date = datetime.strptime(base_date, timeFormat)
+    for each in range(len(hours)): 
+        hour=timedelta(hours[each]/24)    
+        eachTime=base_date + hour
+        times.append(str(eachTime.year) + '-' + str("%02d" % (eachTime.month)) + '-' + str("%02d" % (eachTime.day)) + ' ' + str("%02d" % (eachTime.hour)) + ':' + str("%02d" % (eachTime.minute)) + ':' + str("%02d" % (eachTime.second)))
+
+    return latitudes, longitudes, levels, times, values
+
+
+def improve_data(latitudes, longitudes, levels, times, values, timestep):
+    
+    # Make arrays of unique latitudes, longitudes, levels and times
+    uniqueLatitudes = np.unique(latitudes)
+    uniqueLongitudes = np.unique(longitudes)
+    uniqueLevels = np.unique(levels)
+    uniqueTimestamps = np.unique(times)
+    
+    # Calculate nx and ny
+    uniqueLongitudeCount = len(uniqueLongitudes)
+    uniqueLatitudeCount = len(uniqueLatitudes)
+    uniqueLevelCount = len(uniqueLevels)
+    uniqueTimeCount = len(uniqueTimestamps)
+
+    values, latitudes, longitudes = reorderXYT(longitudes, latitudes, times, values)
+
+    # Convert each unique time from strings into list of Python datetime objects
+    # TODO - LIST COMPS!
+    timeFormat = "%Y-%m-%d %H:%M:%S"
+    timesUnique = [datetime.strptime(t, timeFormat) for t in uniqueTimestamps]
+    timesUnique.sort()
+    timesUnique = process.normalizeDatetimes(timesUnique, timestep)
+
+    # Reshape arrays
+    latitudes = latitudes.reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount)
+    longitudes = longitudes.reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount)
+    levels = np.array(levels).reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount)
+    values = values.reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount)
+
+    # Flatten dimension if only single level
+    if uniqueLevelCount == 1:
+        values = values[:, :, :, 0]
+        latitudes = latitudes[0, :, :, 0]
+        longitudes = longitudes[0, :, :, 0]
+
+    # Created masked array to deal with missing values
+    #  -these make functions like values.mean(), values.max() etc ignore missing values
+    mdi = -9999  # TODO: extract this value from the DB retrieval metadata
+    mdata = ma.masked_array(values, mask=(values == mdi))
+
+
+    return latitudes, longitudes, uniqueLevels, timesUnique, mdata
+    
+    
+def extractData ( datasetID, paramID, latMin, latMax, lonMin, lonMax, userStartTime, userEndTime, cachedir, timestep ):
+    
+    """
+    Main function to extract data from DB into numpy masked arrays, and also to create monthly netCDF file as cache
+    
+    Input::
+        datasetID, paramID: required identifiers of data in database
+        latMin, latMax, lonMin, lonMax: location range to extract data for
+        startTime, endTime: python datetime objects describing required time range to extract
+        cachedir: directory path used to store temporary cache files
+        timestep: "daily" | "monthly" so we can be sure to query the RCMED properly
+    Output:
+        uniqueLatitudes,uniqueLongitudes: 1d-numpy array of latitude and longitude grid values
+        uniqueLevels:	1d-numpy array of vertical level values
+        timesUnique: list of python datetime objects describing times of returned data
+        mdata: masked numpy arrays of data values
+    """
+
+    url = RCMED.jplUrl(datasetID, paramID, latMin, latMax, lonMin, lonMax, userStartTime, userEndTime, cachedir, timestep) 
+    
+    # To get the parameter's information from parameter table
+    database, timestep, realm, instrument, dbStartDate, dbEndDate, unit = get_param_info(url)
+        
+    # Create a directory inside the cache directory
+    name = []
+    # activity is a fix value
+    activity = "obs4cmip5"
+    name.append(activity)
+    # product is a fix value
+    product = "observations"
+    name.append(product)
+    # realm, variable,frequency and instrument will be get from parameter table
+    realm = realm
+    name.append(realm)
+    variable = database
+    name.append(variable)
+    frequency = timestep
+    name.append(frequency)
+    data_structure = "grid"
+    name.append(data_structure)
+    institution = "NASA"
+    name.append(institution)
+    project = "RCMES"
+    name.append(project)
+    instrument = instrument
+    name.append(instrument)
+    version = "v1"
+    name.append(version)
+    
+    # Check to see whether the folder is already created for netCDF or not, then it will be created
+    temp_path = cachedir
+    for n in name:
+        path = os.path.join(temp_path, n)
+        if os.path.exists(path):
+            temp_path = path
+            pass
+        else:
+            os.mkdir(path)
+            temp_path = path
+
+    processing_level = 'L3'
+    processing_version = "processing_version"  # the processing version is still unknown and can be added later
+    
+    timeFormat = "%Y-%m-%d %H:%M:%S"
+   
+    date_list, lats, longs, uniqueLevls, uniqueTimes, vals = [], [], [], [], [], []
+
+    # To make a list (date_list) of all months available based on user time request
+    while userStartTime <= userEndTime:
+        #To get the beginning of month
+        beginningOfMonth = str("%04d" % userStartTime.year) + "-" + str("%02d" % userStartTime.month) + "-" + "01 00:00:00"
+        #To get the end of month
+        endOfMonth = str("%04d" % userStartTime.year) + "-" + str("%02d" % userStartTime.month) + "-" + str(monthrange(userStartTime.year,userStartTime.month)[1]) + " 00:00:00"
+        #To convert both beginning and end of month from string to Python datetime format
+        beginningOfMonth = datetime.strptime(beginningOfMonth, timeFormat)
+        endOfMonth = datetime.strptime(endOfMonth, timeFormat)
+        #To add beginning and end of month as a list to the date_list list
+        date_list.append([beginningOfMonth, endOfMonth])
+        #To get the beginning of next month
+        userStartTime= endOfMonth + timedelta(days=1)
+
+    
+    # To loop over all months and return data
+    for i, date in enumerate(date_list):
+        netCDF_name = variable + '_' + project + '_' + processing_level + '_' + processing_version + '_' + str(latMin) + '_' + str(latMax) + '_' + str(lonMin) + '_' + str(lonMax) + '_' + str("%04d" % date[0].year) + str("%02d" % date[0].month) + '.nc'
+        
+        # To check if netCDF file  exists, then use it
+        if os.path.exists(path+"/"+ netCDF_name):
+            latitudes, longitudes, levels, times, values = read_netcdf(path + "/" + netCDF_name)  
+        
+        # If the netCDF file does not exist, then create one and read it.
+        else:            
+            # To just query for one year of data
+            print "%s of %s Database Download(s) Complete" % (i, len(date_list))  
+            url = RCMED.jplUrl(datasetID, paramID, latMin, latMax, lonMin, lonMax, date[0], date[1], cachedir, timestep)
+            
+            # To get data from DB
+            latitudes, longitudes, levels, values, timestamps = get_data(url)
+            create_netCDF(latitudes, longitudes, levels, values, timestamps, database, latMin, latMax, lonMin, lonMax, date[0], date[1], unit, path + "/" + netCDF_name)
+
+            # To read from netCDF files
+            latitudes, longitudes, levels, times, values = read_netcdf(path + "/" + netCDF_name)            
+
+        lats=np.append(lats,latitudes)
+        longs=np.append(longs,longitudes)
+        uniqueLevls=np.append(uniqueLevls,levels)
+        uniqueTimes=np.append(uniqueTimes,times)
+        vals=np.append(vals,values)
+        
+    latitudes, longitudes, uniqueLevels, timesUnique, mdata = improve_data(lats, longs, uniqueLevls, uniqueTimes, vals, timestep)
+        
+    return latitudes, longitudes, uniqueLevels, timesUnique, mdata

Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py1
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.db
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.db?rev=1537520&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.db (added)
+++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.db Thu Oct 31 14:59:48 2013
@@ -0,0 +1,45 @@
+0a1,16
+> #
+> #  Licensed to the Apache Software Foundation (ASF) under one or more
+> #  contributor license agreements.  See the NOTICE file distributed with
+> #  this work for additional information regarding copyright ownership.
+> #  The ASF licenses this file to You under the Apache License, Version 2.0
+> #  (the "License"); you may not use this file except in compliance with
+> #  the License.  You may obtain a copy of the License at
+> #
+> #      http://www.apache.org/licenses/LICENSE-2.0
+> #
+> #  Unless required by applicable law or agreed to in writing, software
+> #  distributed under the License is distributed on an "AS IS" BASIS,
+> #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+> #  See the License for the specific language governing permissions and
+> #  limitations under the License.
+> #
+115c131
+<     netcdf =  netCDF4.Dataset(netCD_fileName,'w')
+---
+>     netcdf =  netCDF4.Dataset(netCD_fileName,mode='w')
+165c181
+<     values = netcdf.variables['value'][:]
+---
+>     values = ma.array(netcdf.variables['value'][:])
+281a298
+>     temp_path = cachedir
+283c300
+<         path = os.path.join(cachedir, n)
+---
+>         path = os.path.join(temp_path, n)
+284a302
+>             temp_path = path
+287a306
+>             temp_path = path
+310c329
+<     print 'Starting retrieval data (this may take several minutes) ...... ' 
+---
+>     
+312c331
+<     for date in date_list:
+---
+>     for i, date in enumerate(date_list):
+321a341
+>             print "%s of %s Database Download(s) Complete" % (i, len(date_list))  

Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.db
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.files
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.files?rev=1537520&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.files (added)
+++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.files Thu Oct 31 14:59:48 2013
@@ -0,0 +1,397 @@
+0a1,16
+> #
+> #  Licensed to the Apache Software Foundation (ASF) under one or more
+> #  contributor license agreements.  See the NOTICE file distributed with
+> #  this work for additional information regarding copyright ownership.
+> #  The ASF licenses this file to You under the Apache License, Version 2.0
+> #  (the "License"); you may not use this file except in compliance with
+> #  the License.  You may obtain a copy of the License at
+> #
+> #      http://www.apache.org/licenses/LICENSE-2.0
+> #
+> #  Unless required by applicable law or agreed to in writing, software
+> #  distributed under the License is distributed on an "AS IS" BASIS,
+> #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+> #  See the License for the specific language governing permissions and
+> #  limitations under the License.
+> #
+2,3c18,22
+< Module for handling data input files.
+< This module can easily open NetCDF, HDF and Grib files.
+---
+> Module for handling data input files.  Requires netCDF and Numpy be 
+> installed.
+> 
+> This module can easily open NetCDF, HDF and Grib files.  Search the netCDF4
+> documentation for a complete list of supported formats.
+7d25
+< 
+118,121d135
+<     lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360.  # convert to -180,180 if necessary
+< 
+<     """TODO:  Guard against case where latsraw and lonsraw are not the same dim?"""
+<    
+145c159
+<     for ifile in filelist:
+---
+>     for i, ifile in enumerate(filelist):
+149c163
+<         t2raw = f.variables[myvar][:]
+---
+>         t2raw = ma.array(f.variables[myvar][:])
+171c185
+<         timesaccu = timesaccu + ntimes
+---
+>         timesaccu += ntimes
+173,174c187
+<         i += 1 
+<       
+---
+>         
+186,191c199,202
+<     data_dict = {}
+<     data_dict['lats'] = lat
+<     data_dict['lons'] = lon
+<     data_dict['times'] = timestore
+<     data_dict['data'] = t2store
+<     #return lat, lon, timestore, t2store
+---
+>     # Make sure latlon grid is monotonically increasing and that the domains
+>     # are correct
+>     lat, lon, t2store = checkLatLon(lat, lon, t2store)
+>     data_dict = {'lats': lat, 'lons': lon, 'times': timestore, 'data': t2store}
+200c211
+<          fmt - (optional) specify fileformat for PyNIO if filename suffix is non-standard
+---
+>          fmt - (optional) specify fileformat for netCDF4 if filename suffix is non-standard
+214c225
+<         f = netCDF4.Dataset(myfile, mode='r',format=fmt)
+---
+>         f = netCDF4.Dataset(myfile, mode='r', format=fmt)
+242,244c253,254
+<     f = netCDF4.Dataset(myfile, mode='r',format='NETCDF4')
+<     
+<     keylist = f.variables.keys()
+---
+>     f = netCDF4.Dataset(myfile, mode='r', format='NETCDF4')
+>     keylist = [key.encode().lower() for key in f.variables.keys()]
+261c271
+< def read_lolaT_from_file(filename, latVarName, lonVarName, timeVarName, file_type):
+---
+> def read_data_from_one_file(ifile, myvar, latVarName, lonVarName, timeVarName, file_type):
+263c273,274
+<     Function that will return lat, lon, and time arrays
+---
+>     Purpose::
+>         Read in data from one file at a time
+265,267c276,278
+<     Input::
+<         filename - the file to inspect
+<         latVarName - name of the Latitude Variable
+---
+>     Input::   
+>         filelist - list of filenames (including path)
+>         myvar - string containing name of variable to load in (as it appears in file)s
+270,277c281
+<         fileType = type of file we are trying to parse
+<     
+<     Output::
+<         lat - MESH GRID of Latitude values with shape (nx, ny)
+<         lon - MESH GRID of Longitude values with shape (nx, ny)
+<         timestore - Python list of Datetime objects
+<         
+<         MESHGRID docs: http://docs.scipy.org/doc/numpy/reference/generated/numpy.meshgrid.html
+---
+>         fileType - type of file we are trying to parse
+279,308c283,288
+<     """
+< 
+<     tmp = netCDF4.Dataset(filename, mode='r',format=file_type)
+<     lonsraw = tmp.variables[lonVarName][:]
+<     latsraw = tmp.variables[latVarName][:]
+<     lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360.  # convert to -180,180 if necessary
+<     if(latsraw.ndim == 1):
+<         lon, lat = np.meshgrid(lonsraw, latsraw)
+<     if(latsraw.ndim == 2):
+<         lon = lonsraw
+<         lat = latsraw
+<     timestore, _ = process.getModelTimes(filename, timeVarName)
+<     print '  read_lolaT_from_file: Lats, lons and times read in for the model domain'
+<     return lat, lon, timestore
+< 
+< def read_data_from_one_file(ifile, myvar, timeVarName, lat, file_type):
+<     ##################################################################################
+<     # Read in data from one file at a time
+<     # Input:   filelist - list of filenames (including path)
+<     #          myvar    - string containing name of variable to load in (as it appears in file)
+<     # Output:  lat, lon - 2D array of latitude and longitude values
+<     #          times    - list of times
+<     #          t2store  - numpy array containing data from all files    
+<     # Modified from read_data_from_file_list to read data from multiple models into a 4-D array
+<     # 1. The code now processes model data that completely covers the 20-yr period. Thus,
+<     #    all model data must have the same time levels (ntimes). Unlike in the oroginal, ntimes
+<     #    is fixed here.
+<     # 2. Because one of the model data exceeds 240 mos (243 mos), the model data must be
+<     #    truncated to the 240 mons using the ntimes determined from the first file.
+<     ##################################################################################
+---
+>      Output::  
+>         lat, lon - 2D arrays of latitude and longitude values
+>         times - list of times
+>         t2store - numpy array containing data from the file for the requested variable
+>         varUnit - units for the variable given by t2store  
+>     """           
+314c294
+<     t2raw = f.variables[myvar][:]
+---
+>     t2raw = ma.array(f.variables[myvar][:])
+318,319c298,306
+<     t2tmp = ma.array(t2tmp)
+<  
+---
+>         
+>     lonsraw = f.variables[lonVarName][:]
+>     latsraw = f.variables[latVarName][:]
+>     if(latsraw.ndim == 1):
+>         lon, lat = np.meshgrid(lonsraw, latsraw)
+>     if(latsraw.ndim == 2):
+>         lon = lonsraw
+>         lat = latsraw
+>     
+323c310,314
+<     return timestore, t2tmp, varUnit
+---
+>     
+>     # Make sure latlon grid is monotonically increasing and that the domains
+>     # are correct
+>     lat, lon, t2store = checkLatLon(lat, lon, t2tmp)
+>     return lat, lon, timestore, t2store, varUnit
+444c435,436
+<     lonsraw[lonsraw > 180] = lonsraw[lonsraw > 180] - 360.  # convert to -180,180 if necessary
+---
+>     timesraw = tmp.variables[timeVarName]
+>     
+447,450c439,442
+<     if(latsraw.ndim == 2):
+<         lon = lonsraw; lat = latsraw
+<     
+<     timesraw = tmp.variables[timeVarName]
+---
+>         
+>     elif(latsraw.ndim == 2):
+>         lon = lonsraw
+>         lat = latsraw
+466,467c458
+<     i = 0
+<     for ifile in filelist:
+---
+>     for i, ifile in enumerate(filelist):
+470c461
+<         t2raw = f.variables[myvar][:]
+---
+>         t2raw = ma.array(f.variables[myvar][:])
+472d462
+<         time = timesraw[0:ntimes]
+488d477
+<         i += 1 
+495c484,487
+<     
+---
+> 
+>     # Make sure latlon grid is monotonically increasing and that the domains
+>     # are correct
+>     lat, lon, t2store = checkLatLon(lat, lon, t2store)
+663,719d654
+< def writeNCfile1(fileName, numSubRgn, lons, lats, allData, datRgnAvg, datList, subRegions):
+<     # write an output file of variables up to 3 dimensions
+<     # fileName: the name of the output data file
+<     # numSubRgn  : the number of subregions
+<     # lons[ngrdX]: longitude
+<     # lats[ngrdY]: latitudes
+<     # allData[nT,ngrdY,ngrdX]: the obs+mdl time series of the entire model domain
+<     # datRgnAvg[numSubRgn,nT]: the obs+mdl time series for the all subregions
+<     dimD, dimT, dimY, dimX = allData.shape
+<     #dimD = allData.shape[0]      # the number of obs + mdl datasets
+<     #dimT = allData.shape[1]      # the number of time levels
+<     #dimY = allData.shape[2]      # y-dimension
+<     #dimX = allData.shape[3]      # x-dimension
+<     dimR = datRgnAvg.shape[1]    # the number of subregions
+<     f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4')
+<     print datRgnAvg.shape, dimD, dimR, dimT
+<     #create global attributes
+<     f.description = ''
+<     # create dimensions
+<     print 'Creating Dimensions within the NetCDF Object...'
+<     f.createDimension('unity', 1)
+<     f.createDimension('time', dimT)
+<     f.createDimension('west_east', dimX)
+<     f.createDimension('south_north', dimY)
+<     f.createDimension('data', dimD)
+< 
+<     # create the variable (real*4) to be written in the file
+<     print 'Creating Variables...'
+<     f.createVariable('lon', 'd', ('south_north', 'west_east'))
+<     f.createVariable('lat', 'd', ('south_north', 'west_east'))
+<     f.createVariable('gDat', 'd', ('data', 'time', 'south_north', 'west_east'))
+<     
+<     if subRegions:
+<         f.createDimension('regions', dimR)
+<         f.createVariable('dRgn', 'd', ('data', 'regions', 'time'))
+<         f.variables['dRgn'].varAttName = 'Subregion-mean time series: All (obs + model) datasets'
+< 
+<     loadDataIntoNetCDF1(f, datList, allData)
+<     print 'Loaded all data into the NetCDF'
+< 
+<     # create attributes and units for the variable
+<     print 'Creating Attributes and Units...'
+<     f.variables['lon'].varAttName = 'Longitudes'
+<     f.variables['lon'].varUnit = 'degrees East'
+<     f.variables['lat'].varAttName = 'Latitudes'
+<     f.variables['lat'].varUnit = 'degrees North'
+<     f.variables['gDat'].varAttName = 'Gridded data time series: entire domain'
+< 
+<     # assign the values to the variable and write it
+<     f.variables['lon'][:] = lons[:]
+<     f.variables['lat'][:] = lats[:]
+<     f.variables['gDat'][:] = allData[:]
+<     if subRegions:
+<         f.variables['dRgn'][:] = datRgnAvg[:]
+< 
+<     f.close()
+< 
+731c666
+<     for dataset in datasets:
+---
+>     for datasetCount, dataset in enumerate(datasets):
+740,741c675
+<         fileObject.variables[datasetName][:]=dataArray[datasetCount,:,:,:]
+<         datasetCount += 1
+---
+>         fileObject.variables[datasetName][:] = dataArray[datasetCount,:,:,:]
+743c677
+< def loadDataIntoNetCDF1(fileObject, datasets, dataArray):
+---
+> def checkLatLon(latsin, lonsin, datain):
+744a679,683
+>     Purpose::
+>         Checks whether latitudes and longitudes are monotonically increasing
+>         within the domains [-90, 90) and [-180, 180) respectively, and rearranges the input data
+>         accordingly if they are not.
+>     
+746,748c685,689
+<         fileObject - PyNIO file object data will be loaded into
+<         datasets - List of dataset names
+<         dataArray - Multi-dimensional array of data to be loaded into the NetCDF file
+---
+>         latsin - Array of latitudes read from a raw netcdf file
+>         lonsin - Array of longitudes read from a raw netcdf file
+>         datain  - Array of data values read from a raw netcdf file.
+>                    The shape is assumed to be (..., nLat, nLon).
+>         
+750c691,734
+<         No return value.  PyNIO file object is updated in place
+---
+>         latsout - 2D array of (rearranged) latitudes
+>         lonsout - 2D array of (rearranged) longitudes
+>         dataout - Array of (rearranged) data
+>     """
+>     # Avoid unnecessary shifting if all lons are higher than 180
+>     if lonsin.min() > 180:
+>         lonsin -= 360
+>         
+>     # Make sure lats and lons are monotonically increasing
+>     latsDecreasing = np.diff(latsin[:, 0]) < 0
+>     lonsDecreasing = np.diff(lonsin[0]) < 0
+>     
+>     # If all values are decreasing then they just need to be reversed
+>     latsReversed, lonsReversed = latsDecreasing.all(), lonsDecreasing.all()
+>     
+>     # If the lat values are unsorted then raise an exception
+>     if not latsReversed and latsDecreasing.any():
+>         raise ValueError('Latitudes must be monotonically increasing.')
+>     
+>     # Perform same checks now for lons
+>     if not lonsReversed and lonsDecreasing.any():
+>         raise ValueError('Longitudes must be monotonically increasing.')
+>     
+>     # Also check if lons go from [0, 360), and convert to [-180, 180)
+>     # if necessary
+>     lonsShifted = lonsin.max() > 180
+>     latsout, lonsout, dataout = latsin[:], lonsin[:], datain[:]
+>     # Now correct data if latlon grid needs to be shifted    
+>     if latsReversed:
+>         latsout = latsout[::-1]
+>         dataout = dataout[..., ::-1, :]
+>         
+>     if lonsReversed:
+>         lonsout = lonsout[..., ::-1]
+>         dataout = dataout[..., ::-1]
+>         
+>     if lonsShifted:
+>         lat1d = latsout[:, 0]
+>         dataout, lon1d = shiftgrid(180, dataout, lonsout[0], start=False)
+>         lonsout, latsout = np.meshgrid(lon1d, lat1d) 
+>         
+>     return latsout, lonsout, dataout
+>     
+> def shiftgrid(lon0, datain, lonsin, start= True, cyclic=360.0):
+752,760c736,783
+<     datasetCount = 0
+<     for dataset in datasets:
+<         datasetName = path.splitext(path.basename(dataset))[0]
+<         print "Creating variable %s" % datasetName
+<         fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 'west_east'))
+<         fileObject.variables[datasetName].varAttName = 'Obseration time series: entire domain'
+<         print 'Loading values into %s' % datasetName
+<         fileObject.variables[datasetName][:]=dataArray[datasetCount,:,:,:]
+<         datasetCount += 1
+---
+>     Purpose::
+>         Shift global lat/lon grid east or west. This function is taken directly
+>         from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does not
+>         currently support arrays with more than two dimensions.
+>         https://github.com/matplotlib/basemap
+>         
+>     Input::
+>         lon0 - starting longitude for shifted grid (ending longitude if start=False). 
+>                lon0 must be on input grid (within the range of lonsin).
+>         datain - original data with longitude the right-most dimension.
+>         lonsin - original longitudes.
+>         start  - if True, lon0 represents the starting longitude of the new grid. 
+>                  if False, lon0 is the ending longitude. Default True.
+>         cyclic - width of periodic domain (default 360)
+> 
+>     Output:: 
+>         dataout - data on shifted grid
+>         lonsout - lons on shifted grid
+>     """
+>     if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4:
+>         # Use all data instead of raise ValueError, 'cyclic point not included'
+>         start_idx = 0
+>     else:
+>         # If cyclic, remove the duplicate point
+>         start_idx = 1
+>     if lon0 < lonsin[0] or lon0 > lonsin[-1]:
+>         raise ValueError('lon0 outside of range of lonsin')
+>     i0 = np.argmin(np.fabs(lonsin-lon0))
+>     i0_shift = len(lonsin)-i0
+>     if ma.isMA(datain):
+>         dataout  = ma.zeros(datain.shape,datain.dtype)
+>     else:
+>         dataout  = np.zeros(datain.shape,datain.dtype)
+>     if ma.isMA(lonsin):
+>         lonsout = ma.zeros(lonsin.shape,lonsin.dtype)
+>     else:
+>         lonsout = np.zeros(lonsin.shape,lonsin.dtype)
+>     if start:
+>         lonsout[0:i0_shift] = lonsin[i0:]
+>     else:
+>         lonsout[0:i0_shift] = lonsin[i0:]-cyclic
+>     dataout[...,0:i0_shift] = datain[...,i0:]
+>     if start:
+>         lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic
+>     else:
+>         lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]
+>     dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx]
+>     return dataout,lonsout
+\ No newline at end of file

Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.files
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.rcmed
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.rcmed?rev=1537520&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.rcmed (added)
+++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.rcmed Thu Oct 31 14:59:48 2013
@@ -0,0 +1,17 @@
+0a1,16
+> #
+> #  Licensed to the Apache Software Foundation (ASF) under one or more
+> #  contributor license agreements.  See the NOTICE file distributed with
+> #  this work for additional information regarding copyright ownership.
+> #  The ASF licenses this file to You under the Apache License, Version 2.0
+> #  (the "License"); you may not use this file except in compliance with
+> #  the License.  You may obtain a copy of the License at
+> #
+> #      http://www.apache.org/licenses/LICENSE-2.0
+> #
+> #  Unless required by applicable law or agreed to in writing, software
+> #  distributed under the License is distributed on an "AS IS" BASIS,
+> #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+> #  See the License for the specific language governing permissions and
+> #  limitations under the License.
+> #

Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/dff.rcmed
------------------------------------------------------------------------------
    svn:executable = *

Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/files.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/files.py?rev=1537520&view=auto
==============================================================================
--- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/files.py (added)
+++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/files.py Thu Oct 31 14:59:48 2013
@@ -0,0 +1,861 @@
+#
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+"""
+Module for handling data input files.  Requires netCDF and Numpy be 
+installed.
+
+This module can easily open NetCDF, HDF and Grib files.  Search the netCDF4
+documentation for a complete list of supported formats.
+"""
+
+from os import path
+import netCDF4
+import numpy as np
+import numpy.ma as ma
+import sys
+
+from toolkit import process
+from utils import fortranfile
+from utils import misc
+
+
+VARIABLE_NAMES = {'time': ['time', 'times', 'date', 'dates', 'julian'],
+                  'latitude': ['latitude', 'lat', 'lats', 'latitudes'],
+                  'longitude': ['longitude', 'lon', 'lons', 'longitudes']
+                  }
+
+
+def findunique(seq):
+    keys = {}
+    for e in seq:
+        keys[e] = 1
+    return keys.keys()
+
+def getVariableByType(filename, variableType):
+    """
+    Function that will try to return the variable from a file based on a provided
+    parameter type.
+    
+    Input::
+        filename - the file to inspect
+        variableType - time | latitude | longitude
+    
+    Output::
+        variable name OR list of all variables in the file if a single variable
+        name match cannot be found.
+    """
+    try:
+        f = netCDF4.Dataset(filename, mode='r')
+    except:
+        print "netCDF4Error:", sys.exc_info()[0]
+        raise
+    
+    variableKeys = f.variables.keys()
+    f.close()
+    variableKeys = [variable.encode().lower() for variable in variableKeys]
+    variableMatch = VARIABLE_NAMES[variableType]
+
+    commonVariables = list(set(variableKeys).intersection(variableMatch)) 
+
+    if len(commonVariables) == 1:
+        return str(commonVariables[0])
+    
+    else:
+        return variableKeys
+
+def getVariableRange(filename, variableName):
+    """
+    Function to return the min and max values of the given variable in
+    the supplied filename.
+   
+    Input::
+        filename - absolute path to a file
+        variableName - variable whose min and max values should be returned
+
+    Output::
+        variableRange - tuple of order (variableMin, variableMax)
+    """
+    try:
+        f = netCDF4.Dataset(filename, mode='r')
+    except:
+        print "netCDF4Error:", sys.exc_info()[0]
+        raise
+    
+    varArray = f.variables[variableName][:]
+    return (varArray.min(), varArray.max())
+
+
+def read_data_from_file_list(filelist, myvar, timeVarName, latVarName, lonVarName):
+    '''
+    Read in data from a list of model files into a single data structure
+   
+    Input:
+       filelist - list of filenames (including path)
+       myvar    - string containing name of variable to load in (as it appears in file)
+    Output:
+       lat, lon - 2D array of latitude and longitude values
+       timestore    - list of times
+       t2store  - numpy array containing data from all files    
+   
+     NB. originally written specific for WRF netCDF output files
+         modified to make more general (Feb 2011)
+   
+      Peter Lean July 2010 
+    '''
+
+    filelist.sort()
+    filename = filelist[0]
+    # Crash nicely if 'filelist' is zero length
+    """TODO:  Throw Error instead via try Except"""
+    if len(filelist) == 0:
+        print 'Error: no files have been passed to read_data_from_file_list()'
+        sys.exit()
+
+    # Open the first file in the list to:
+    #    i) read in lats, lons
+    #    ii) find out how many timesteps in the file 
+    #        (assume same ntimes in each file in list)
+    #     -allows you to create an empty array to store variable data for all times
+    tmp = netCDF4.Dataset(filename, mode='r')
+    latsraw = tmp.variables[latVarName][:]
+    lonsraw = tmp.variables[lonVarName][:]
+    if(latsraw.ndim == 1):
+        lon, lat = np.meshgrid(lonsraw, latsraw)
+    if(latsraw.ndim == 2):
+        lon = lonsraw
+        lat = latsraw
+
+    timesraw = tmp.variables[timeVarName]
+    ntimes = len(timesraw)
+    
+    print 'Lats and lons read in for first file in filelist'
+
+    # Create a single empty masked array to store model data from all files
+    t2store = ma.zeros((ntimes * len(filelist), len(lat[:, 0]), len(lon[0, :])))
+    timestore = ma.zeros((ntimes * len(filelist))) 
+    
+    # Now load in the data for real
+    #  NB. no need to reload in the latitudes and longitudes -assume invariant
+    i = 0
+    timesaccu = 0 # a counter for number of times stored so far in t2store 
+    #  NB. this method allows for missing times in data files 
+    #      as no assumption made that same number of times in each file...
+
+
+    for i, ifile in enumerate(filelist):
+
+        #print 'Loading data from file: ',filelist[i]
+        f = netCDF4.Dataset(ifile, mode='r')
+        t2raw = ma.array(f.variables[myvar][:])
+        timesraw = f.variables[timeVarName]
+        time = timesraw[:]
+        ntimes = len(time)
+        print 'file= ', i, 'ntimes= ', ntimes, filelist[i]
+        print 't2raw shape: ', t2raw.shape
+        
+        # Flatten dimensions which needn't exist, i.e. level 
+        #   e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do.
+        #  Code requires data to have dimensions, (time,lat,lon)
+        #    i.e. remove level dimensions
+        # Remove 1d axis from the t2raw array
+        # Example: t2raw.shape == (365, 180, 360 1) <maps to (time, lat, lon, height)>
+        # After the squeeze you will be left with (365, 180, 360) instead
+        t2tmp = t2raw.squeeze()
+        # Nb. if this happens to be data for a single time only, then we just flattened it by accident
+        #     lets put it back... 
+        if t2tmp.ndim == 2:
+            t2tmp = np.expand_dims(t2tmp, 0)
+
+        t2store[timesaccu + np.arange(ntimes), :, :] = t2tmp[:, :, :]
+        timestore[timesaccu + np.arange(ntimes)] = time
+        timesaccu += ntimes
+        f.close()
+        
+    print 'Data read in successfully with dimensions: ', t2store.shape
+    
+    # TODO: search for duplicated entries (same time) and remove duplicates.
+    # Check to see if number of unique times == number of times, if so then no problem
+
+    if(len(np.unique(timestore)) != len(np.where(timestore != 0)[0].view())):
+        print 'WARNING: Possible duplicated times'
+
+    # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here
+    timestore, _ = process.getModelTimes(filename, timeVarName)
+    
+    # Make sure latlon grid is monotonically increasing and that the domains
+    # are correct
+    lat, lon, t2store = checkLatLon(lat, lon, t2store)
+    data_dict = {'lats': lat, 'lons': lon, 'times': timestore, 'data': t2store}
+    return data_dict
+
+def select_var_from_file(myfile, fmt='not set'):
+    '''
+     Routine to act as user interface to allow users to select variable of interest from a file.
+     
+      Input:
+         myfile - filename
+         fmt - (optional) specify fileformat for netCDF4 if filename suffix is non-standard
+    
+      Output:
+         myvar - variable name in file
+    
+        Peter Lean  September 2010
+    '''
+
+    print fmt
+    
+    if fmt == 'not set':
+        f = netCDF4.Dataset(myfile, mode='r')
+    
+    if fmt != 'not set':
+        f = netCDF4.Dataset(myfile, mode='r', format=fmt)
+    
+    keylist = [key.encode().lower() for key in f.variables.keys()]
+    
+    i = 0
+    for v in keylist:
+        print '[', i, '] ', f.variables[v].long_name, ' (', v, ')'
+        i += 1
+
+    user_selection = raw_input('Please select variable : [0 -' + str(i - 1) + ']  ')
+    
+    myvar = keylist[int(user_selection)]
+    
+    return myvar
+
+def select_var_from_wrf_file(myfile):
+    '''
+     Routine to act as user interface to allow users to select variable of interest from a wrf netCDF file.
+     
+      Input:
+         myfile - filename
+    
+      Output:
+         mywrfvar - variable name in wrf file
+    
+        Peter Lean  September 2010
+    '''
+
+    f = netCDF4.Dataset(myfile, mode='r', format='NETCDF4')
+    keylist = [key.encode().lower() for key in f.variables.keys()]
+
+    i = 0
+    for v in keylist:
+        try:
+            print '[', i, '] ', f.variables[v].description, ' (', v, ')'
+        except:
+            print ''
+
+        i += 1
+    
+    user_selection = raw_input('Please select WRF variable : [0 -' + str(i - 1) + ']  ')
+    
+    mywrfvar = keylist[int(user_selection)]
+    
+    return mywrfvar
+
+def read_data_from_one_file(ifile, myvar, latVarName, lonVarName, timeVarName, file_type):
+    """
+    Purpose::
+        Read in data from one file at a time
+    
+    Input::   
+        filelist - list of filenames (including path)
+        myvar - string containing name of variable to load in (as it appears in file)s
+        lonVarName - name of the Longitude Variable
+        timeVarName - name of the Time Variable
+        fileType - type of file we are trying to parse
+        
+     Output::  
+        lat, lon - 2D arrays of latitude and longitude values
+        times - list of times
+        t2store - numpy array containing data from the file for the requested variable
+        varUnit - units for the variable given by t2store  
+    """           
+    f = netCDF4.Dataset(ifile, mode='r')
+    try:
+        varUnit = f.variables[myvar].units.encode().upper()
+    except:
+        varUnit = raw_input('Enter the model variable unit: \n> ').upper()
+    t2raw = ma.array(f.variables[myvar][:])
+    t2tmp = t2raw.squeeze()
+    if t2tmp.ndim == 2:
+        t2tmp = np.expand_dims(t2tmp, 0)
+        
+    lonsraw = f.variables[lonVarName][:]
+    latsraw = f.variables[latVarName][:]
+    if(latsraw.ndim == 1):
+        lon, lat = np.meshgrid(lonsraw, latsraw)
+    if(latsraw.ndim == 2):
+        lon = lonsraw
+        lat = latsraw
+    
+    f.close()
+    print '  success read_data_from_one_file: VarName=', myvar, ' Shape(Full)= ', t2tmp.shape, ' Unit= ', varUnit
+    timestore = process.decode_model_timesK(ifile, timeVarName, file_type)
+    
+    # Make sure latlon grid is monotonically increasing and that the domains
+    # are correct
+    lat, lon, t2store = checkLatLon(lat, lon, t2tmp)
+    return lat, lon, timestore, t2store, varUnit
+
+def findTimeVariable(filename):
+    """
+     Function to find what the time variable is called in a model file.
+        Input::
+            filename - file to crack open and check for a time variable
+        Output::
+            timeName - name of the input file's time variable
+            variableNameList - list of variable names from the input filename
+    """
+    try:
+        f = netCDF4.Dataset(filename, mode='r')
+    except:
+        print("Unable to open '%s' to try and read the Time variable" % filename)
+        raise
+
+    variableNameList = [variable.encode() for variable in f.variables.keys()]
+    # convert all variable names into lower case
+    varNameListLowerCase = [x.lower() for x in variableNameList]
+
+    # Use "set" types for finding common variable name from in the file and from the list of possibilities
+    possibleTimeNames = set(['time', 'times', 'date', 'dates', 'julian'])
+    
+    # Use the sets to find the intersection where variable names are in possibleNames
+    timeNameSet = possibleTimeNames.intersection(varNameListLowerCase)
+    
+    if len(timeNameSet) == 0:
+        print("Unable to autodetect the Time Variable Name in the '%s'" % filename)
+        timeName = misc.askUserForVariableName(variableNameList, targetName ="Time")
+    
+    else:
+        timeName = timeNameSet.pop()
+    
+    return timeName, variableNameList
+
+
+def findLatLonVarFromFile(filename):
+    """
+    Function to find what the latitude and longitude variables are called in a model file.
+    
+    Input:: 
+        -filename 
+    Output::
+        -latVarName
+        -lonVarName
+        -latMin 
+        -latMax
+        -lonMin
+        -lonMax
+    """
+    try:
+        f = netCDF4.Dataset(filename, mode='r')
+    except:
+        print("Unable to open '%s' to try and read the Latitude and Longitude variables" % filename)
+        raise
+
+    variableNameList = [variable.encode() for variable in f.variables.keys()]
+    # convert all variable names into lower case
+    varNameListLowerCase = [x.lower() for x in variableNameList]
+
+    # Use "set" types for finding common variable name from in the file and from the list of possibilities
+    possibleLatNames = set(['latitude', 'lat', 'lats', 'latitudes'])
+    possibleLonNames = set(['longitude', 'lon', 'lons', 'longitudes'])
+    
+    # Use the sets to find the intersection where variable names are in possibleNames
+    latNameSet = possibleLatNames.intersection(varNameListLowerCase)
+    lonNameSet = possibleLonNames.intersection(varNameListLowerCase)
+    
+    if len(latNameSet) == 0 or len(lonNameSet) == 0:
+        print("Unable to autodetect Latitude and/or Longitude values in the file")
+        latName = misc.askUserForVariableName(variableNameList, targetName ="Latitude")
+        lonName = misc.askUserForVariableName(variableNameList, targetName ="Longitude")
+    
+    else:
+        latName = latNameSet.pop()
+        lonName = lonNameSet.pop()
+    
+    lats = np.array(f.variables[latName][:])
+    lons = np.array(f.variables[lonName][:])
+    
+    latMin = lats.min()
+    latMax = lats.max()
+    
+    # Convert the lons from 0:360 into -180:180
+    lons[lons > 180] = lons[lons > 180] - 360.
+    lonMin = lons.min()
+    lonMax = lons.max()
+
+    return latName, lonName, latMin, latMax, lonMin, lonMax
+
+
+def read_data_from_file_list_K(filelist, myvar, timeVarName, latVarName, lonVarName, file_type):
+    ##################################################################################
+    # Read in data from a list of model files into a single data structure
+    # Input:   filelist - list of filenames (including path)
+    #          myvar    - string containing name of variable to load in (as it appears in file)
+    # Output:  lat, lon - 2D array of latitude and longitude values
+    #          times    - list of times
+    #          t2store  - numpy array containing data from all files    
+    # Modified from read_data_from_file_list to read data from multiple models into a 4-D array
+    # 1. The code now processes model data that completely covers the 20-yr period. Thus,
+    #    all model data must have the same time levels (ntimes). Unlike in the oroginal, ntimes
+    #    is fixed here.
+    # 2. Because one of the model data exceeds 240 mos (243 mos), the model data must be
+    #    truncated to the 240 mons using the ntimes determined from the first file.
+    ##################################################################################
+    filelist.sort()
+    nfiles = len(filelist)
+    # Crash nicely if 'filelist' is zero length
+    if nfiles == 0:
+        print 'Error: no files have been passed to read_data_from_file_list(): Exit'
+        sys.exit()
+
+    # Open the first file in the list to:
+    #    i)  read in lats, lons
+    #    ii) find out how many timesteps in the file (assume same ntimes in each file in list)
+    #     -allows you to create an empty array to store variable data for all times
+    tmp = netCDF4.Dataset(filelist[0], mode='r', format=file_type)
+    latsraw = tmp.variables[latVarName][:]
+    lonsraw = tmp.variables[lonVarName][:]
+    timesraw = tmp.variables[timeVarName]
+    
+    if(latsraw.ndim == 1):
+        lon, lat = np.meshgrid(lonsraw, latsraw)
+        
+    elif(latsraw.ndim == 2):
+        lon = lonsraw
+        lat = latsraw
+    ntimes = len(timesraw); nygrd = len(lat[:, 0]); nxgrd = len(lon[0, :])
+    
+    print 'Lats and lons read in for first file in filelist'
+
+    # Create a single empty masked array to store model data from all files
+    #t2store = ma.zeros((ntimes*nfiles,nygrd,nxgrd))
+    t2store = ma.zeros((nfiles, ntimes, nygrd, nxgrd))
+    #timestore=ma.zeros((ntimes*nfiles)) 
+    
+    ## Now load in the data for real
+    ##  NB. no need to reload in the latitudes and longitudes -assume invariant
+    #timesaccu=0 # a counter for number of times stored so far in t2store 
+    #  NB. this method allows for missing times in data files 
+    #      as no assumption made that same number of times in each file...
+
+    for i, ifile in enumerate(filelist):
+        #print 'Loading data from file: ',filelist[i]
+        f = netCDF4.Dataset(ifile, mode='r')
+        t2raw = ma.array(f.variables[myvar][:])
+        timesraw = f.variables[timeVarName]
+        #ntimes=len(time)
+        #print 'file= ',i,'ntimes= ',ntimes,filelist[i]
+        ## Flatten dimensions which needn't exist, i.e. level 
+        ##   e.g. if for single level then often data have 4 dimensions, when 3 dimensions will do.
+        ##  Code requires data to have dimensions, (time,lat,lon)
+        ##    i.e. remove level dimensions
+        t2tmp = t2raw.squeeze()
+        ## Nb. if data happen to be for a single time, we flattened it by accident; lets put it back... 
+        if t2tmp.ndim == 2:
+            t2tmp = np.expand_dims(t2tmp, 0)
+        #t2store[timesaccu+np.arange(ntimes),:,:]=t2tmp[0:ntimes,:,:]
+        t2store[i, 0:ntimes, :, :] = t2tmp[0:ntimes, :, :]
+        #timestore[timesaccu+np.arange(ntimes)]=time
+        #timesaccu=timesaccu+ntimes
+        f.close()
+
+    print 'Data read in successfully with dimensions: ', t2store.shape
+    
+    # Decode model times into python datetime objects. Note: timestore becomes a list (no more an array) here
+    ifile = filelist[0]
+    timestore, _ = process.getModelTimes(ifile, timeVarName)
+
+    # Make sure latlon grid is monotonically increasing and that the domains
+    # are correct
+    lat, lon, t2store = checkLatLon(lat, lon, t2store)
+    return lat, lon, timestore, t2store
+
+def find_latlon_ranges(filelist, lat_var_name, lon_var_name):
+    # Function to return the latitude and longitude ranges of the data in a file,
+    # given the identifying variable names.
+    #
+    #    Input:
+    #            filelist - list of filenames (data is read in from first file only)
+    #            lat_var_name - variable name of the 'latitude' variable
+    #            lon_var_name - variable name of the 'longitude' variable
+    #
+    #    Output:
+    #            latMin, latMax, lonMin, lonMax - self explanatory
+    #
+    #                    Peter Lean      March 2011
+    
+    filename = filelist[0]
+    
+    try:
+        f = netCDF4.Dataset(filename, mode='r')
+        
+        lats = f.variables[lat_var_name][:]
+        latMin = lats.min()
+        latMax = lats.max()
+        
+        lons = f.variables[lon_var_name][:]
+        lons[lons > 180] = lons[lons > 180] - 360.
+        lonMin = lons.min()
+        lonMax = lons.max()
+        
+        return latMin, latMax, lonMin, lonMax
+
+    except:
+        print 'Error: there was a problem with finding the latitude and longitude ranges in the file'
+        print '       Please check that you specified the filename, and variable names correctly.'
+        
+        sys.exit()
+
+def writeBN_lola(fileName, lons, lats):
+    # write a binary data file that include longitude (1-d) and latitude (1-d) values
+    
+    F = fortranfile.FortranFile(fileName, mode='w')
+    ngrdY = lons.shape[0]; ngrdX = lons.shape[1]
+    tmpDat = ma.zeros(ngrdX); tmpDat[:] = lons[0, :]; F.writeReals(tmpDat)
+    tmpDat = ma.zeros(ngrdY); tmpDat[:] = lats[:, 0]; F.writeReals(tmpDat)
+    # release temporary arrays
+    tmpDat = 0
+    F.close()
+
+def writeBNdata(fileName, numOBSs, numMDLs, nT, ngrdX, ngrdY, numSubRgn, obsData, mdlData, obsRgnAvg, mdlRgnAvg):
+    #(fileName,maskOption,numOBSs,numMDLs,nT,ngrdX,ngrdY,numSubRgn,obsData,mdlData,obsRgnAvg,mdlRgnAvg):
+    # write spatially- and regionally regridded data into a binary data file
+    missing = -1.e26
+    F = fortranfile.FortranFile(fileName, mode='w')
+    # construct a data array to replace mask flag with a missing value (missing=-1.e12) for printing
+    data = ma.zeros((nT, ngrdY, ngrdX))
+    for m in np.arange(numOBSs):
+        data[:, :, :] = obsData[m, :, :, :]; msk = data.mask
+        for n in np.arange(nT):
+            for j in np.arange(ngrdY):
+                for i in np.arange(ngrdX):
+                    if msk[n, j, i]: data[n, j, i] = missing
+
+        # write observed data. allowed to write only one row at a time
+        tmpDat = ma.zeros(ngrdX)
+        for n in np.arange(nT):
+            for j in np.arange(ngrdY):
+                tmpDat[:] = data[n, j, :]
+                F.writeReals(tmpDat)
+
+    # write model data (dep. on the number of models).
+    for m in np.arange(numMDLs):
+        data[:, :, :] = mdlData[m, :, :, :]
+        msk = data.mask
+        for n in np.arange(nT):
+            for j in np.arange(ngrdY):
+                for i in np.arange(ngrdX):
+                    if msk[n, j, i]:
+                        data[n, j, i] = missing
+
+        for n in np.arange(nT):
+            for j in np.arange(ngrdY):
+                tmpDat[:] = data[n, j, :]
+                F.writeReals(tmpDat)
+
+    data = 0     # release the array allocated for data
+    # write data in subregions
+    if numSubRgn > 0:
+        print 'Also included are the time series of the means over ', numSubRgn, ' areas from obs and model data'
+        tmpDat = ma.zeros(nT); print numSubRgn
+        for m in np.arange(numOBSs):
+            for n in np.arange(numSubRgn):
+                tmpDat[:] = obsRgnAvg[m, n, :]
+                F.writeReals(tmpDat)
+        for m in np.arange(numMDLs):
+            for n in np.arange(numSubRgn):
+                tmpDat[:] = mdlRgnAvg[m, n, :]
+                F.writeReals(tmpDat)
+    tmpDat = 0     # release the array allocated for tmpDat
+    F.close()
+
+def writeNCfile(fileName, numSubRgn, lons, lats, obsData, mdlData, obsRgnAvg, mdlRgnAvg, obsList, mdlList, subRegions):
+    # write an output file of variables up to 3 dimensions
+    # fileName: the name of the output data file
+    # numSubRgn  : the number of subregions
+    # lons[ngrdX]: longitude
+    # lats[ngrdY]: latitudes
+    # obsData[nT,ngrdY,ngrdX]: the obs time series of the entire model domain
+    # mdlData[numMDLs,nT,ngrdY,ngrdX]: the mdltime series of the entire model domain
+    # obsRgnAvg[numSubRgn,nT]: the obs time series for the all subregions
+    # mdlRgnAvg[numMDLs,numSubRgn,nT]: the mdl time series for the all subregions
+    dimO = obsData.shape[0]      # the number of obs data
+    dimM = mdlData.shape[0]      # the number of mdl data
+    dimT = mdlData.shape[1]      # the number of time levels
+    dimY = mdlData.shape[2]      # y-dimension
+    dimX = mdlData.shape[3]      # x-dimension
+    dimR = obsRgnAvg.shape[1]    # the number of subregions
+    f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4')
+    print mdlRgnAvg.shape, dimM, dimR, dimT
+    #create global attributes
+    f.description = ''
+    # create dimensions
+    print 'Creating Dimensions within the NetCDF Object...'
+    f.createDimension('unity', 1)
+    f.createDimension('time', dimT)
+    f.createDimension('west_east', dimX)
+    f.createDimension('south_north', dimY)
+    f.createDimension('obs', dimO)
+    f.createDimension('models', dimM)
+        
+    # create the variable (real*4) to be written in the file
+    print 'Creating Variables...'
+    f.createVariable('lon', 'd', ('south_north', 'west_east'))
+    f.createVariable('lat', 'd', ('south_north', 'west_east'))
+    f.createVariable('oDat', 'd', ('obs', 'time', 'south_north', 'west_east'))
+    f.createVariable('mDat', 'd', ('models', 'time', 'south_north', 'west_east'))
+    
+    if subRegions:
+        f.createDimension('regions', dimR)
+        f.createVariable('oRgn', 'd', ('obs', 'regions', 'time'))
+        f.createVariable('mRgn', 'd', ('models', 'regions', 'time'))
+        f.variables['oRgn'].varAttName = 'Observation time series: Subregions'
+        f.variables['mRgn'].varAttName = 'Model time series: Subregions'
+
+    loadDataIntoNetCDF(f, obsList, obsData, 'Observation')
+    print 'Loaded the Observations into the NetCDF'
+
+    loadDataIntoNetCDF(f, mdlList, mdlData, 'Model')
+
+    # create attributes and units for the variable
+    print 'Creating Attributes and Units...'
+    f.variables['lon'].varAttName = 'Longitudes'
+    f.variables['lon'].varUnit = 'degrees East'
+    f.variables['lat'].varAttName = 'Latitudes'
+    f.variables['lat'].varUnit = 'degrees North'
+    f.variables['oDat'].varAttName = 'Observation time series: entire domain'
+    f.variables['mDat'].varAttName = 'Model time series: entire domain'
+
+    # assign the values to the variable and write it
+    f.variables['lon'][:] = lons[:]
+    f.variables['lat'][:] = lats[:]
+    if subRegions:
+        f.variables['oRgn'][:] = obsRgnAvg[:]
+        f.variables['mRgn'][:] = mdlRgnAvg[:]
+
+    f.close()
+
+def writeNCfile1(fileName, numDatasets, numObs, numMdl, numSubRgn, lons, lats, allData, dataRgn, dataList, subRegions):
+    # write an output file of variables up to 3 dimensions
+    # fileName: the name of the output data file
+    # numDatasets: the total number of datasets (numObs + numMdl)
+    # numObs     : the number of obs datasets (including the obs ensemble, if exists)
+    # numMdl     : the number of mdl datasets (including the mdl ensemble, if exists)
+    # numSubRgn  : the number of subregions
+    # lons[ngrdX]: longitude
+    # lats[ngrdY]: latitudes
+    # allData[numDatasets,nT,ngrdY,ngrdX]: the combined obs and model time series for the entire model domain (t,y,x)
+    # dataRgn[numDatasets,numSubRgn,nT]: the obs time series for the all subregions
+    # dataList   : the names of datasets
+    # subRegions : sub-region info
+    dimN = allData.shape[0]      # the number of all (obs + model) datasets
+    dimT = allData.shape[1]      # the number of time levels
+    dimY = allData.shape[2]      # y-dimension
+    dimX = allData.shape[3]      # x-dimension
+    dimR = dataRgn.shape[1]      # the number of subregions
+    f = netCDF4.Dataset(fileName, mode='w', format='NETCDF4')
+    print dataRgn.shape, dimN, dimR, dimT
+    #create global attributes
+    f.description = ''
+    # create dimensions
+    print 'Creating Dimensions within the NetCDF Object...'
+    f.createDimension('unity', 1)
+    f.createDimension('time', dimT)
+    f.createDimension('west_east', dimX)
+    f.createDimension('south_north', dimY)
+    f.createDimension('data', dimN)
+    # create the variable (real*4) to be written in the file
+    print 'Creating Variables...'
+    f.createVariable('lon', 'd', ('south_north', 'west_east'))
+    f.createVariable('lat', 'd', ('south_north', 'west_east'))
+    f.createVariable('dGrd', 'd', ('data', 'time', 'south_north', 'west_east'))
+
+    if subRegions:
+        f.createDimension('regions', dimR)
+        f.createVariable('dRgn', 'd', ('data', 'regions', 'time'))
+        f.variables['dRgn'].varAttName = 'Sub-region mean time series'
+
+    # load the gridded data into the output file
+    loadDataIntoNetCDF1(f, dataList, allData)
+    print 'Loaded the gridded into the NetCDF'
+
+    # create attributes and units for the variable
+    print 'Creating Attributes and Units...'
+    f.variables['lon'].varAttName = 'Longitudes'
+    f.variables['lon'].varUnit = 'degrees East'
+    f.variables['lat'].varAttName = 'Latitudes'
+    f.variables['lat'].varUnit = 'degrees North'
+    f.variables['dGrd'].varAttName = 'Gridded time series'
+
+    # assign the values to the variable and write it
+    f.variables['lon'][:] = lons[:]
+    f.variables['lat'][:] = lats[:]
+    if subRegions:
+        f.variables['dRgn'][:] = dataRgn[:]
+
+    f.close()
+
+def loadDataIntoNetCDF(fileObject, datasets, dataArray, dataType):
+    """
+    Input::
+        fileObject - netCDF4 file object data will be loaded into
+        datasets - List of dataset names
+        dataArray - Multi-dimensional array of data to be loaded into the NetCDF file
+        dataType - String with value of either 'Model' or 'Observation'
+    Output::
+        No return value.  netCDF4 file object is updated in place
+    """
+    datasetCount = 0
+    for datasetCount, dataset in enumerate(datasets):
+        if dataType.lower() == 'observation':
+            datasetName = dataset.replace(' ','')
+        elif dataType.lower() == 'model':
+            datasetName = path.splitext(path.basename(dataset))[0]
+        print "Creating variable %s" % datasetName
+        fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 'west_east'))
+        fileObject.variables[datasetName].varAttName = 'Obseration time series: entire domain'
+        print 'Loading values into %s' % datasetName
+        fileObject.variables[datasetName][:] = dataArray[datasetCount,:,:,:]
+
+def loadDataIntoNetCDF1(fileObject, datasets, dataArray):
+    """
+    Input::
+        fileObject - netCDF4 file object data will be loaded into
+        datasets - List of dataset names
+        dataArray - Multi-dimensional array of data to be loaded into the NetCDF file
+    Output::
+        No return value.  netCDF4 file object is updated in place
+    """
+    datasetCount = 0
+    for datasetCount, dataset in enumerate(datasets):
+        datasetName = path.splitext(path.basename(dataset))[0]
+        print "Creating variable %s" % datasetName
+        fileObject.createVariable(datasetName, 'd', ('time', 'south_north', 'west_east'))
+        fileObject.variables[datasetName].varAttName = 'Gridded time series: entire domain'
+        print 'Loading values into %s' % datasetName
+        fileObject.variables[datasetName][:] = dataArray[datasetCount,:,:,:]
+
+def checkLatLon(latsin, lonsin, datain):
+    """
+    Purpose::
+        Checks whether latitudes and longitudes are monotonically increasing
+        within the domains [-90, 90) and [-180, 180) respectively, and rearranges the input data
+        accordingly if they are not.
+    
+    Input::
+        latsin - Array of latitudes read from a raw netcdf file
+        lonsin - Array of longitudes read from a raw netcdf file
+        datain  - Array of data values read from a raw netcdf file.
+                   The shape is assumed to be (..., nLat, nLon).
+        
+    Output::
+        latsout - 2D array of (rearranged) latitudes
+        lonsout - 2D array of (rearranged) longitudes
+        dataout - Array of (rearranged) data
+    """
+    # Avoid unnecessary shifting if all lons are higher than 180
+    if lonsin.min() > 180:
+        lonsin -= 360
+        
+    # Make sure lats and lons are monotonically increasing
+    latsDecreasing = np.diff(latsin[:, 0]) < 0
+    lonsDecreasing = np.diff(lonsin[0]) < 0
+    
+    # If all values are decreasing then they just need to be reversed
+    latsReversed, lonsReversed = latsDecreasing.all(), lonsDecreasing.all()
+    
+    # If the lat values are unsorted then raise an exception
+    if not latsReversed and latsDecreasing.any():
+        raise ValueError('Latitudes must be monotonically increasing.')
+    
+    # Perform same checks now for lons
+    if not lonsReversed and lonsDecreasing.any():
+        raise ValueError('Longitudes must be monotonically increasing.')
+    
+    # Also check if lons go from [0, 360), and convert to [-180, 180)
+    # if necessary
+    lonsShifted = lonsin.max() > 180
+    latsout, lonsout, dataout = latsin[:], lonsin[:], datain[:]
+    # Now correct data if latlon grid needs to be shifted    
+    if latsReversed:
+        latsout = latsout[::-1]
+        dataout = dataout[..., ::-1, :]
+        
+    if lonsReversed:
+        lonsout = lonsout[..., ::-1]
+        dataout = dataout[..., ::-1]
+        
+    if lonsShifted:
+        lat1d = latsout[:, 0]
+        dataout, lon1d = shiftgrid(180, dataout, lonsout[0], start=False)
+        lonsout, latsout = np.meshgrid(lon1d, lat1d) 
+        
+    return latsout, lonsout, dataout
+    
+def shiftgrid(lon0, datain, lonsin, start= True, cyclic=360.0):
+    """
+    Purpose::
+        Shift global lat/lon grid east or west. This function is taken directly
+        from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does not
+        currently support arrays with more than two dimensions.
+        https://github.com/matplotlib/basemap
+        
+    Input::
+        lon0 - starting longitude for shifted grid (ending longitude if start=False). 
+               lon0 must be on input grid (within the range of lonsin).
+        datain - original data with longitude the right-most dimension.
+        lonsin - original longitudes.
+        start  - if True, lon0 represents the starting longitude of the new grid. 
+                 if False, lon0 is the ending longitude. Default True.
+        cyclic - width of periodic domain (default 360)
+
+    Output:: 
+        dataout - data on shifted grid
+        lonsout - lons on shifted grid
+    """
+    if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4:
+        # Use all data instead of raise ValueError, 'cyclic point not included'
+        start_idx = 0
+    else:
+        # If cyclic, remove the duplicate point
+        start_idx = 1
+    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
+        raise ValueError('lon0 outside of range of lonsin')
+    i0 = np.argmin(np.fabs(lonsin-lon0))
+    i0_shift = len(lonsin)-i0
+    if ma.isMA(datain):
+        dataout  = ma.zeros(datain.shape,datain.dtype)
+    else:
+        dataout  = np.zeros(datain.shape,datain.dtype)
+    if ma.isMA(lonsin):
+        lonsout = ma.zeros(lonsin.shape,lonsin.dtype)
+    else:
+        lonsout = np.zeros(lonsin.shape,lonsin.dtype)
+    if start:
+        lonsout[0:i0_shift] = lonsin[i0:]
+    else:
+        lonsout[0:i0_shift] = lonsin[i0:]-cyclic
+    dataout[...,0:i0_shift] = datain[...,i0:]
+    if start:
+        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic
+    else:
+        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]
+    dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx]
+    return dataout,lonsout

Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/files.py
------------------------------------------------------------------------------
    svn:executable = *



Mime
View raw message