Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/bottlemet.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/bottlemet.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/bottlemet.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/bottlemet.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,599 @@ +''' + Module to create a web service for RCMET statistical metrics +''' + + + + +########################################################################################## +#setting up bottle and importing metrics +########################################################################################## + +#sets up bottle and necessary methods. +from bottle import route, run, post, request, redirect, debug + +#temporary quick-fix to track errors, not for publication +debug(True) + +#imports pickle +import pickle + +#imports metrics from RCMET +import rcmes.metrics as mtx + + + + +########################################################################################## +#error-catching dictionaries +########################################################################################## + +#dictionary of MetricNames and their number of variables. Useful later on +HowManyVariables={ + "calc_stdev" :1, + "calc_annual_cycle_means" : 2, + "calc_annual_cycle_std" : 2, + "calc_annual_cycle_domain_means" : 2, + "calc_annual_cycle_domain_std" : 2, + "calc_bias" : 2, + "calc_bias_dom" : 2, + "calc_difference" : 2, + "calc_mae" : 2, + "calc_mae_dom" :2, + "calc_rms" : 2, + "calc_rms_dom" : 2, + "calc_temporal_pat_cor" : 2, + "calc_pat_cor" : 2, + "calc_nash_sutcliff" : 2, + "calc_pdf" : 2, + "calc_anom_corn" : 3 +} + +#dictionary of metric names and the names of their variables. +NameOfVariables={ + "calc_stdev":['t1'], + "calc_annual_cycle_means" :['data','time'], + "calc_annual_cycle_std" :['data','time'], + "calc_annual_cycle_domain_means" :['data','time'], + "calc_annual_cycle_domain_std" :['data','time'], + "calc_bias" :['t1','t2'], + "calc_bias_dom" :['t1','t2'], + "calc_difference" :['t1','t2'], + "calc_mae" :['t1','t2'], + "calc_mae_dom" : ['t1','t2'], + "calc_rms" :['t1','t2'], + "calc_rms_dom" :['t1','t2'], + "calc_temporal_pat_cor" :['t1','t2'], + "calc_pat_cor" :['t1','t2'], + "calc_nash_sutcliff" :['t1','t2'], + "calc_pdf" :['t1','t2'], + "calc_anom_corn" :['t1','t2','t4'] +} + +#two lists that will help with user explanation later + +ArrayNames=[] + +ListOfArrays=[] + + + +########################################################################################## + +#Running the metrics through interactive web pages + +########################################################################################## + + + +########################################################################################## +#First parts: introduction and identification of user's needs +########################################################################################## + +#basic first page. Explanation could be more comprehensive +@route('/rcmet/metrics/online') +def ShowPossibleMetrics(): + ''' + Returns a page in html that allows the user to select a metric through links + ''' + return ''' + RCMET Metrics through Bottle + +

Please select the metric you will use.

+ +

Metrics with one variable: + "calc_stdev" to return standard deviation +

+ +

Metrics with two variables: + "calc_annual_cycle_means" to return monthly means + ""calc_annual_cycle_std" to return monthly standard deviation + "calc_annual_cycle_domain_ means" to return monthly + domain means + "calc_annual_cycle_domain_std" to return monthly standard + deviation + "calc_bias" to return mean difference + "calc_bias_dom" to return domain mean difference + "calc_difference" to return difference + "calc_mae" to return mean absolute error + "calc_mae_dom" to return domain mean difference over time + "calc_rms" to return root mean square error + + "calc_rms_dom" to return domain root mean square error + "calc_temporal_pat_cor" to return temporal pattern correlation + "calc_pat_cor" to return pattern correlation + "calc_nash_sutcliff" to return Nash-Sutcliff coefficient of + efficiency + "calc_pdf" to return probability distribution function + +

Metrics with three variables: + "calc_anom_corn" to return anomaly correlation

+ + ''' + +#creates introductory page to explain how to use bottle +@route('/rcmet/metrics/online/') +def VariableSubmission(MetricNameHere): + ''' + Returns a page in html that allows the user to choose between submitting variables on the command line or searching + RCMED + ''' + + global MetricName + + MetricName=MetricNameHere + + if MetricName in HowManyVariables: + return "For metric %s , you need %d variable(s), which will represent: %s" %(MetricName, + HowManyVariables[MetricName], NameOfVariables[MetricName][:]), ''' + +

Will you enter variables (which are arrays) through the command line or + will you search the RCMES Database?

+ command line + RCMED + + ''', + '''Run Methods''' + + else: + return "The metric you entered doesn't exist." + + +########################################################################################## +#getting arrays through the command line +########################################################################################## + +#Tells the user how to send variables from the command line +@route('/rcmet/metrics/online/commandline') +def ArraysFromCommandLine(): + ''' + Explains to the user how to submit a variable through POST on the command line + ''' + if HowManyVariables[MetricName]-count<=0: + print "You have already submitted all the needed variables for this metric." + redirect('/rcmet/metrics/online/calculate') + else: + return "Please use your command line to POST a form with the array. Send either a pickled file or serialized ", + "string. Name the form: array. Include also, a form that describes/names the array. Call this form: name. A ", + "sample would be array=array_here and name=array_name_here. Send the form to: ", + "http://.../rcmet/metrics//commandline. Once the computer receives all variables, you may ", + "move on to the metrics portion of the website. Currently, you have submitted %d variable(s) and need %d ", + "more. The next variable you submit will represent the variable %s in %s" %(count, + (HowManyVariables[MetricName]-count),NameOfVariables[MetricName][count], MetricName) + +#this function gets the array from the command line +@route('/rcmet/metrics/online/commandline', method='POST') +def ReceivingArrays(): + ''' + Uses the POST method to retrieve any arrays sent by the user, and proceed to deserialize them. Also adds each + variable to the appropriate list, and proceeds to offer the user the option to add more variables or else move + on to calculating the value of the metric; + ''' + + try: + BottleMetrics.GetVariablesFromCommandLine() + + return "Variable received as %s. Will represent %s" % (ArrayNames[count-1], + NameOfVariables[MetricName][count-1]), "Submit more variables?", + '''Online''', + '''Command Line''', + '''No More Variables''', + '''Run Methods''' + + except pickle.UnpicklingError: + return "This object cannot be unpickled. Send only a file or serialized string.", + '''Re-submit Variable''', + '''Run Methods''' + + +########################################################################################## +#getting variables through RCMED +########################################################################################## + +#explains how to enter information into a dynamic link +@route('/rcmet/metrics/online/rcmed') +def QueryRcmed(): + ''' + Returns a page in html that explains to the user how to search RCMED for the desired arrays, and offers the + user multiple forms in which to enter search parameters + ''' + + #I was unclear what type the dataset ID and paramID were supposed to be. This may need to change + + return "Currently, you have submitted %d variable(s) and need %d more. The next"\ + " variable you submit will represent the variable %s in %s" %(count, + (HowManyVariables[MetricName]-count),NameOfVariables[MetricName][count], MetricName),''' + Query RCMED for Array Data + +

Enter the parameters into the appropriate boxes.

+
+

Dataset ID

+ +

Parameter ID

+ +

latMin, float

+ +

latMax, float

+ +

lonMin, float

+ +

lonMax, float

+ +

startTime, datetime object

+ +

endTime, datetime object

+ +

cachedir, string

+ +

Array Name, string

+ +
+ + ''' + + +@route('/rcmet/metrics/online/rcmed', method='POST') +def GetVariablesFromDatabase(): + ''' + Gets data from forms, searches the database, processes the variables, and prompts the user to submit more. + ''' + BottleMetrics.GetVariablesFromRcmed() + + + return "Submit more variables?",'''Online''', + '''Command Line''', + '''No More Variables''', + '''Run Methods''' + + +########################################################################################## +#running the metrics online +########################################################################################## + +#this function actually runs the metrics +@route('/rcmet/metrics/online/calculate') +def Calculate(MetricName): + ''' + Uses variables from the lists to return the answer for the metric. Also returns a brief description of the metric performed. + ''' + + if HowManyVariables[MetricName]Command Line, + Online''','''Run Methods''' + + else: + return BottleMetrics.ExplainMetric(), str(result), '''Run Methods''', + '''Start Over''' + + + '''Run Methods''' + +@route('/rcmet/metrics/online/methods') +def ChooseMethodOnline(): + ''' + Allows an online user to access any method in the class + ''' + + return "Which method?", ''' + Status + ExplainMetric + VariableCount + ReturnResult + CleanUp''' + +@route('/rcmet/metrics/online/methods/) +def RunMethodOnline(MethodName): + ''' + Runs any method in class MetricWebService() chosen by an online user + ''' + + MetricWebServiceMethod=getattr(BottleMetrics, MethodName) + + return BottleMetrics.MetricWebServiceMethod(), '''Back to Beginning''' + + +########################################################################################## +########################################################################################## + +#creating a class for the Web Service + +########################################################################################## +########################################################################################## + +class MetricWebService(object): + ''' + Class containing all of the necessary functions to find, process, and use the variables to run metrics. Also allows + the user to see the state of the metric, i.e. how many variables have been entered. + ''' + + def __init__(self): + + global count + count=0 + +########################################################################################## + + def Status(self): + ''' + Provides a standardized system for showing how many variables are submitted, allowing the user to + check their progress + ''' + print "For metric %s , you need %d variable(s): %s. Currently, you have submitted "\ + "%d variable(s) and need %d more. The values you have submitted, %s, will represent %s respectively." + %(MetricName, HowManyVariables[MetricName], NameOfVariables[MetricName][:], count, + (HowManyVariables[MetricName]-count),ArrayNames[:],NameOfVariables[MetricName][:]) + + return "For metric %s , you need %d variable(s): %s. Currently, you have submitted "\ + "%d variable(s) and need %d more. The values you have submitted, %s, will represent %s respectively." + %(MetricName, HowManyVariables[MetricName], NameOfVariables[MetricName][:], count, + (HowManyVariables[MetricName]-count),ArrayNames[:],NameOfVariables[MetricName][:]) + +########################################################################################## + + def ExplainMetric(self): + ''' + Provides a standardized means of returning a metric's docstring and thus describing the metric + ''' + method=getattr(mt, MetricName) + + print method.__doc__ + + return method.__doc__ +########################################################################################## + + def VariableCount(self): + ''' + Determines how many variables have been submitted, and if the right number has, this function runs the RunMetrics() method + ''' + + if HowManyVariables[MetricName]-count>0: + + print "Please add more variables" + + return "Please add more variables" + + if HowManyVariables[MetricName]-count<0: + print "You have added too many variables" + + return "Please add more variabels" + + else: + print "You have added all necessary metrics. The program will now run your metric." + + self.RunMetrics() + + +########################################################################################## + + def ProcessVariables(self, array, ArrayName): + ''' + adds the variables posted by the user to the appropriate lists, raises count to indicate this addition, and + starts VariableCount() + ''' + ListOfArrays.append(array) + ArrayNames.append(ArrayName) + + global count + count=count+1 + + print "Variable received as %s. Will represent %s" % (ArrayName, + NameOfVariables[MetricName][count-1]) + + self.VariableCount() + +########################################################################################## + + def GetVariablesFromCommandLine(self): + ''' + Gets array and array name from forms, deserializes them with unpickle, and runs ProcessVariables() + ''' + + if HowManyVariables[MetricName]-count>0: + array=request.forms.get('array') + ArrayName=request.forms.get('name') + + if type(array)==str: + array=pickle.loads(array) + + else: + array=pickle.load(array) + + self.ProcessVariables(array, ArrayName) + + else: + self.VariableCount() + +########################################################################################## + + def GetVariablesFromRcmed(self): + ''' + Gets search parameters from forms, runs a search of RCMED, and returns the array mdata + ''' + + if HowManyVariables[MetricName]-count>0: + + import rcmes.db as db + + datasetID=request.forms.get('datasetID') + paramID=request.forms.get('paramID') + latMin=request.forms.get('latMin') + latMax=request.forms.get('latMax') + lonMin=request.forms.get('lonMin') + lonMax=request.forms.get('lonMax') + startTime=request.forms.get('startTime') + endTime=request.forms.get('endTime') + cachedir=request.forms.get('cachedir') + + ArrayName=request.forms.get('name') + + try: + + db.extract_data_from_db(datasetID, paramID, latMin, latMax, lonMin, lonMax, startTime, endTime, cachedir) + + #I don't think this will work + array=mdata + + self.ProcessVariables(array,ArrayName) + + except TypeError: + print "One of your variables was not entered in the correct format or was not entered at all" + + else: + self.VariableCount() + +########################################################################################## + + def GetVariables(self): + ''' + Runs two links that connect with functions meant to handle the variables posted to the links + ''' +#################### + + @route('/rcmet/metrics/get/variables/commandline', method='POST') + def VariablesPostedToCommandline(): + ''' + runs the method GetVariablesFromCommandLine() at the URL, allowing the user to post their forms to this url and have + them handled by GetVariablesFromCommandLine(). + ''' + + try: + self.GetVariablesFromCommandLine() + + except pickle.UnpicklingError: + print "This object cannot be unpickled. Send only a file or serialized string." + + +#################### + + @route('/rcmet/metrics/get/variables/rcmed', method='POST') + def GetVariablesFromRCMED(self): + ''' + runs the method GetVariablesFromRcmed() at the URL, allowing the user to post their forms to this url and have + them handled by GetVariablesFromRcmed(). + ''' + + self.GetVariablesFromRcmed() + +########################################################################################## + + def RunMetrics(self): + ''' + Calls to metrics.py and runs the desired metric using variables submitted by the user. Returns a string of the + value returned by the metric + ''' + + print "Running metric" + + method=getattr(mtx, MetricName) + + global result + + if HowManyVariables[MetricName]==1: + result=method(ListOfArrays[0]) + + if HowManyVariables[MetricName]==2: + result=method(ListOfArrays[0], ListOfArrays[1]) + + if HowManyVariables[MetricName]==3: + result=method(ListOfArrays[0], ListOfArrays[1], ListOfArrays[2]) + +########################################################################################## + + @route('/rcmet/metrics/commandline/return/result') + def ReturnResult(): + ''' + links the result to a uri from which the user can easily fetch it. Note, the result is returned as a string + ''' + #If the result of the metrics is an array, I would recommend including a provision in + #ReturnResult() that pickles or somehow serializes the result, and then, upon getting + #the pickled string from the URL, the user could unpickle it and use it as an array. + + return str(result) + +########################################################################################## + + def CleanUp(self, name): + ''' + resets the lists, the count, and the variable MetricName back to zero, enabling a user to in effect start over, without + re-creating the instance of the class. + ''' + + global ArrayNames + ArrayNames=[] + + global ListOfArrays + ListOfArrays=[] + + global count + count=0 + + global MetricName + name=MetricName + +########################################################################################## +#final commands to tie everything together +########################################################################################## + +#allows the command line user to remotely create an instance of the class +@route('/rcmet/metrics/commandline', method='POST') +def CreateAnInstance(): + ''' + Sets up a POST page that creates an instance of the class for a user on the command line. The user does not need + to open this page for it to function; they need only post the name of the metric they want. + ''' + + NameOfMetric=request.forms.get('NameOfMetric') + + global MetricName + + MetricName=NameOfMetric + + if name in HowManyVariables: + BottleMetrics.GetVariables() + + else: + print "The metric you entered, %s, does not exist" %name + + + +@route('/rcmet/metrics/commandline/methods', method='POST') +def RunAMethod(): + ''' + Allows a command line user to access any method in class MetricWebService() by sending a form + ''' + MethodName=request.forms.get('MethodName') + + MetricWebServiceMethod=getattr(BottleMetrics, MethodName) + + BottleMetrics.MetricWebServiceMethod() + + +BottleMetrics=MetricWebService() + +#final function starts up bottle at http://localhost:8080 +#note: localhost:8080 may need to be changed eventually +run(host='localhost', port=8080) + + + + Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/bottlemet.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/dataset_helpers.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/dataset_helpers.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/dataset_helpers.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/dataset_helpers.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,32 @@ +#!/usr/bin/env python +""" + Provides Bottle services for interacting with RCMED +""" + +from bottle import request, route + +import requests +import Nio + +@route('/getObsDatasets') +def getObservationDatasetData(): + r = requests.get('http://rcmes.jpl.nasa.gov/query-api/datasets.php') + + # Handle JSONP requests + if (request.query.callback): + return "%s(%s)" % (request.query.callback, r.text) + # Otherwise, just return JSON + else: + return r.text + +@route('/getDatasetParam') +def getDatasetParameters(): + url = 'http://rcmes.jpl.nasa.gov/query-api/parameters.php?dataset=' + request.query.dataset + r = requests.get(url) + + # Handle JSONP requests + if (request.query.callback): + return "%s(%s)" % (request.query.callback, r.text) + # Otherwise, just return JSON + else: + return r.text Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/dataset_helpers.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/decode_model_times.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/decode_model_times.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/decode_model_times.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/decode_model_times.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,185 @@ +#!/usr/local/bin/python +"""Module to simplify handling of model times """ +def decodeTimeFromString(time_string): + ''' + # Decodes string into a python datetime object + # Method: tries a bunch of different time format possibilities and hopefully one of them will hit. + # + # Input: time_string - a string that represents a date/time + # Output: mytime - a python datetime object + # + # Peter Lean February 2011 + ''' + import time + import datetime + + try: + mytime = time.strptime(time_string, '%Y-%m-%d %H:%M:%S') + mytime = datetime.datetime(*mytime[0:6]) + return mytime + + except ValueError: + pass + + try: + mytime = time.strptime(time_string, '%Y/%m/%d %H:%M:%S') + mytime = datetime.datetime(*mytime[0:6]) + return mytime + + except ValueError: + pass + + try: + mytime = time.strptime(time_string, '%Y%m%d %H:%M:%S') + mytime = datetime.datetime(*mytime[0:6]) + return mytime + + except ValueError: + pass + + try: + mytime = time.strptime(time_string, '%Y:%m:%d %H:%M:%S') + mytime = datetime.datetime(*mytime[0:6]) + return mytime + + except ValueError: + pass + + try: + mytime = time.strptime(time_string, '%Y%m%d%H%M%S') + mytime = datetime.datetime(*mytime[0:6]) + return mytime + + except ValueError: + pass + + try: + mytime = time.strptime(time_string, '%Y-%m-%d %H:%M') + mytime = datetime.datetime(*mytime[0:6]) + return mytime + + except ValueError: + pass + + + print 'Error decoding time string: string does not match a predefined time format' + return 0 + + + +def decode_model_times(filelist,timeVarName): + ''' + # Routine to convert from model times ('hours since 1900...', 'days since ...') + # into a python datetime structure + # + # Input: + # filelist - list of model files + # timeVarName - name of the time variable in the model files + # + # Output: + # times - list of python datetime objects describing model data times + # + # + # Peter Lean February 2011 + # + ''' + import datetime + import re + import string + import math + import numpy + import Nio + + f = Nio.open_file(filelist[0]) + xtimes = f.variables[timeVarName] + timeFormat = xtimes.units + + # search to check if 'since' appears in units + try: + sinceLoc = re.search('since',timeFormat).end() + + except: + print 'Error decoding model times: time variable attributes do not contain "since"' + return 0 + + # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units + units = '' + try: + mysearch = re.search('minutes',timeFormat).end() + units = 'minutes' + except: + pass + try: + mysearch = re.search('hours',timeFormat).end() + units = 'hours' + except: + pass + try: + mysearch = re.search('days',timeFormat).end() + units = 'days' + except: + pass + try: + mysearch = re.search('months',timeFormat).end() + units = 'months' + except: + pass + try: + mysearch = re.search('years',timeFormat).end() + units = 'years' + except: + pass + + # cut out base time (the bit following 'since') + base_time_string = string.lstrip(timeFormat[sinceLoc:]) + + # decode base time + base_time = decodeTimeFromString(base_time_string) + + + times=[] + for xtime in xtimes[:]: + if(units=='minutes'): + dt = datetime.timedelta(minutes=xtime) + new_time = base_time + dt + + if(units=='hours'): + dt = datetime.timedelta(hours=xtime) + new_time = base_time + dt + + if(units=='days'): + dt = datetime.timedelta(days=xtime) + new_time = base_time + dt + + if(units=='months'): # NB. adding months in python is complicated as month length varies and hence ambigous. + # Perform date arithmatic manually + # Assumption: the base_date will usually be the first of the month + # NB. this method will fail if the base time is on the 29th or higher day of month + # -as can't have, e.g. Feb 31st. + new_month = int(base_time.month + xtime % 12) + new_year = int(math.floor(base_time.year + xtime / 12.)) + new_time = datetime.datetime(new_year,new_month,base_time.day,base_time.hour,base_time.second,0) + + if(units=='years'): + dt = datetime.timedelta(years=xtime) + new_time = base_time + dt + + times.append(new_time) + + return times + + +''' NOT USED BY BOTTLE WS CALLS +import sys +import datetime +filename = [sys.argv[1]] +time_var_name = sys.argv[2] + +print filename, type(filename) +print time_var_name + +times = decode_model_times(filename,time_var_name) + +for time in times: + print time.strftime('%Y-%m-%d %H:%M:%S') +''' Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/decode_model_times.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/directory_helpers.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/directory_helpers.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/directory_helpers.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/directory_helpers.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,42 @@ +#!/usr/bin/env python +""" + Provides helpers for listing retrieving directory information from the server. +""" + +from bottle import request, route +import os +import json + +PATH_LEADER = "/usr/local/rcmes" + +@route('/getDirInfo/') +def getDirectoryInfo(dirPath): + dirPath = PATH_LEADER + dirPath + dirPath = dirPath.replace('/../', '/') + dirPath = dirPath.replace('/./', '/') + + if os.path.isdir(dirPath): + listing = os.listdir(dirPath) + listingNoHidden = [f for f in listing if f[0] != '.'] + joinedPaths = [os.path.join(dirPath, f) for f in listingNoHidden] + joinedPaths = [f + "/" if os.path.isdir(f) else f for f in joinedPaths] + finalPaths = [p.replace(PATH_LEADER, '') for p in joinedPaths] + sorted(finalPaths, key=lambda s: s.lower()) + returnJSON = finalPaths + else: + returnJSON = [] + + returnJSON = json.dumps(returnJSON) + if request.query.callback: + return "%s(%s)" % (request.query.callback, returnJSON) + else: + return returnJSON + +@route('/getPathLeader/') +def getPathLeader(): + returnJSON = {"leader": PATH_LEADER} + + if request.query.callback: + return "%s(%s)" % (request.query.callback, returnJSON) + else: + return returnJSON Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/directory_helpers.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_latlon_var.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_latlon_var.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_latlon_var.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_latlon_var.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,136 @@ +#!/usr/local/bin/python +""" + Small command line utility to find what the latitude and longitude variables are called in a model file. + + Background:: + Model output files tend not to follow any defined standard in terms of + variable naming conventions. One model may call the latitude "lat", + another one may call it "Latitudes". This script looks for the + existence of any of a predefined list of synonyms for lat and long. + + This script should be run from the command line (i.e. not called from + within python) + + Input:: + -filename + + Output:: + -success flag (1 or 0): were both latitude and longitude variable names found in the file? + + if successful:: + -name of latitude variable + -name of longitude variable + -latMin -descriptions of lat/lon ranges in data files + -latMax + -lonMin + -lonMax + + if unsuccessful: + -list of variable names in file + + (NB. all printed to standar output) + +""" + +import sys +import Nio +import bottle +from bottle import request +import json + +#filename = sys.argv[1] + +@bottle.route('/list/latlon/:filename#".*"#') +def find_latlon(filename): + success = 0 + filename = filename.strip('"') + f = Nio.open_file(filename) + var_name_list = f.variables.keys() + + # convert all variable names into lower case + var_name_list_lower = [x.lower() for x in var_name_list] + + # create a "set" from this list of names + varset = set(var_name_list_lower) + + # Use "set" types for finding common variable name from in the file and from the list of possibilities + lat_possible_names = set(['latitude','lat','lats','latitudes']) + lon_possible_names = set(['longitude','lon','lons','longitudes']) + + # Search for common latitude name variants: + # Find the intersection of two sets, i.e. find what latitude is called in this file. + + try: + print 'hello from inside try block' + lat_var_name = list(varset & lat_possible_names)[0] + successlat = 1 + index = 0 + for i in var_name_list_lower: + if i==lat_var_name: + whlat = index + index += 1 + latname = var_name_list[whlat] + + lats = f.variables[latname][:] + latMin = lats.min() + latMax = lats.max() + + except: + print 'exception happens' + latname = 'not_found' + successlat = 0 + + # Search for common longitude name variants: + # Find the intersection of two sets, i.e. find what longitude + # is called in this file. + try: + lon_var_name = list(varset & lon_possible_names)[0] + successlon = 1 + index = 0 + for i in var_name_list_lower: + if i==lon_var_name: + whlon = index + index += 1 + lonname = var_name_list[whlon] + + lons = f.variables[lonname][:] + #this will correct all lons to -180 , 180 + lons[lons>180]=lons[lons>180]-360 + + lonMin = lons.min() + lonMax = lons.max() + + except: + lonname = 'not_found' + successlon = 0 + + + if(successlat & successlon): + success = 1 + + + if success: + print success, latname, lonname, latMin, latMax, lonMin, lonMax + val_types= [int,str,str,str,str,str,str] + success_values = [success, latname, lonname, latMin, latMax, lonMin, lonMax] + value_names = ['success','latname','lonname','latMin','latMax','lonMin','lonMax'] + values = [vtypes(svalues) for vtypes,svalues in zip(val_types,success_values)] + print values + output = dict(zip(value_names,values)) + #json_output = json.dumps({'success':success,'latname':latname, \ + # 'lonname':lonname,'latMin':latMin, \ + # 'latMax':latMax,'lonMin':lonMin, \ + # 'lonMax':lonMax }, sort_keys=True, indent=4) + if (request.query.callback): + return "%s(%s)" % (request.query.callback, output) + return output + + if success==0: + json_output = json.dumps({'success':success, + 'variables':var_name_list }, \ + sort_keys=True, indent=4) + if (request.query.callback): + return "%s(%s)" % (request.query.callback, json_output) + return json_output + #print success, var_name_list + Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_latlon_var.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_time_var.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_time_var.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_time_var.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_time_var.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,87 @@ +#!/usr/local/bin/python +""" + Small command line utility to find what the time variable is called in a model file. + + Background:: + Model output files tend not to follow any defined standard in terms of + variable naming conventions. One model may call the time "time", + another one may call it "t". This script looks for the existence of + any of a predefined list of synonyms for time. + + This script should be run from the command line (i.e. not called from within python) + + Input:: + -filename + + Output:: + -success flag (1 or 0): were both latitude and longitude variable names found in the file? + + if successful: + -name of time variable + -(TODO) modelStartTime -descriptions of time ranges in data files + -(TODO) modelEndTime + if unsuccessful: + -list of variable names in file + + (NB. all printed to standar output) +""" + +import sys +import bottle +from bottle import request +import Nio +import json +import decode_model_times as dmt + + +#filename = sys.argv[1] + + +@bottle.route('/list/time/:filename#".*"#') +def list_time(filename): + filename = filename.strip('"') + success = 0 + f = Nio.open_file(filename) + var_name_list = f.variables.keys() + # convert all variable names into lower case + var_name_list_lower = [x.lower() for x in var_name_list] + # create a "set" from this list of names + varset = set(var_name_list_lower) + # Use "set" types for finding common variable name from in the file and from the list of possibilities + time_possible_names = set(['time','t','times','date','dates','julian']) + # Search for common latitude name variants: + # Find the intersection of two sets, i.e. find what latitude is called in this file. + try: + time_var_name = list(varset & time_possible_names)[0] + success = 1 + index = 0 + for i in var_name_list_lower: + if i==time_var_name: + wh = index + index += 1 + timename = var_name_list[wh] + + except: + timename = 'not_found' + success = 0 + + if success: + print 'timename is '+timename + times = dmt.decode_model_times([filename],timename) + start_time = str(min(times)) + end_time = str(max(times)) + time_var = json.dumps({'success':success,'timename':timename, + 'start_time':start_time,'end_time':end_time}) + #return time_var + if (request.query.callback): + return "%s(%s)" % (request.query.callback, time_var) + return time_var + + if success==0: + json_output = json.dumps({'success':success,'variables':var_name_list }) + if (request.query.callback): + return "%s(%s)" % (request.query.callback, json_output) + return json_output + + #print success, var_name_list + Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/find_time_var.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/list_vars_in_file.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/list_vars_in_file.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/list_vars_in_file.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/list_vars_in_file.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,59 @@ +#!/usr/local/bin/python +""" + Small command line utility to list the variables contained within a model file. + + This script should be run from the command line (i.e. not called from within python) + + Input: + -filename + + Output: + -list of variable names in file + + (NB. all printed to standar output) + + Peter Lean February 2011 + + WEBSERVICE PLAN + + URL: localhost:9999/list_vars/:filename (full file path plus file name) + Example: localhost:9999/list/vars/"/usr/local/wrm/modeldata/wrf.nc" + + Return: JSON Array of Variable Names + Example: { "variables": [ "time_bnds", "tas", "level", "lon", "time", "lat" ] } +""" + +import sys +import Nio +import bottle +from bottle import request +import json +#filename = sys.argv[1] + + +@bottle.route('/list/vars/:filename#".*"#') +def list_vars(filename): + success = 0 + filename = filename.strip('"') + print filename + ' is filename variable' + try: + f = Nio.open_file(filename) + success = 1 + except: + print 'Error_reading_file '+filename + + if success: #make some json + var_name_list = json.dumps({'variables':f.variables.keys() }, \ + sort_keys=True, indent=2) + if (request.query.callback): + return "%s(%s)" % (request.query.callback, var_name_list) + return var_name_list + + else: + failRet = "{\"FAIL\": \""+filename+"\"}" + if (request.query.callback): + return "%s(%s)" % (request.query.callback, failRet) + return failRet + + + Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/list_vars_in_file.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/main_ws.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/main_ws.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/main_ws.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/main_ws.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,36 @@ +"""Module that demonstrates how to initialize the RESTful web services that +power the RCMET GUI""" + +from bottle import route, response, run, static_file, hook +import list_vars_in_file +import find_latlon_var +import find_time_var +import decode_model_times as dmt +import run_rcmes_processing +import dataset_helpers +import directory_helpers + +@route('/') +@route('/index.html') +def index(): + return "Go to Hello World page" + +@route('/hello') +def hello(): + return "Hello World!" + +@route('/api/status') +def api_status(): + return {'status':'online', 'key':'value'} + +@route('/static/evalResults/') +def get_eval_result_image(filename): + return static_file(filename, root="/tmp/rcmet") + +@hook('after_request') +def enable_cors(): + response.headers['Access-Control-Allow-Origin'] = '*' + +if __name__ == "__main__": + run(host='localhost', port=8082) + Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/main_ws.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/run_rcmes_processing.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/run_rcmes_processing.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/run_rcmes_processing.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/run_rcmes_processing.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,114 @@ +#!/usr/local/bin/python +"""Module used to launch the RESTful API""" +import sys +sys.path.append('../../.') +from bottle import route, request +import json +import cli.do_rcmes_processing_sub as awesome +import time +import datetime +import os +time_format_new = '%Y-%m-%d %H:%M:%S' + +#Static Default params +cachedir = '/tmp/rcmet/cache/' +workdir = '/tmp/rcmet/' +precipFlag =False +seasonalCycleOption=0 +maskOption=False +maskLatMin=0 # float (only used if maskOption=1) +maskLatMax=0 # float (only used if maskOption=1) +maskLonMin=0 # float (only used if maskOption=1) +maskLonMax=0 # float (only used if maskOption=1) + +########################################################### +##OPEN FOR DISCUSSION +titleOption = 'default' #this means that ' model vs obs :' will be used +plotFileNameOption = 'default' #another good option we can use. +########################################################### + +@route('/rcmes/run/', method='POST') +def rcmes_go(): + print "**********\nBEGIN RCMES2.0_RUN\n**********" + print 'cachedir', cachedir + print 'workdir', workdir + + try: + if not os.path.exists(cachedir): + os.makedirs(cachedir) + except Error as e: + print "I/O error({0}: {1}".format(e.errno, e.strerror) + sys.exit(1) + + obsDatasetId = int(request.POST.get('obsDatasetId', '').strip()) + print 'obsDatasetId', obsDatasetId + obsParameterId = int(request.POST.get('obsParameterId', '').strip()) + print 'obsParameterId', obsParameterId + + #reformat DateTime after pulling it out of the POST + POSTstartTime = str(request.POST.get('startTime', '').strip()) + startTime = datetime.datetime.fromtimestamp(time.mktime(time.strptime(POSTstartTime, time_format_new))) + print 'startTime', startTime + #reformat DateTime after pulling it out of the POST + POSTendTime = str(request.POST.get('endTime', '').strip()) + endTime = datetime.datetime.fromtimestamp(time.mktime(time.strptime(POSTendTime, time_format_new))) + print 'endTime', endTime + + latMin = float(request.POST.get('latMin', '').strip()) + print 'latMin', latMin + latMax = float(request.POST.get('latMax', '').strip()) + print 'latMax', latMax + lonMin = float(request.POST.get('lonMin', '').strip()) + print 'lonMin', lonMin + lonMax = float(request.POST.get('lonMax', '').strip()) + print 'lonMax', lonMax + + filelist = [request.POST.get('filelist', '').strip()] + print 'filelist', filelist[0] + + modelVarName = str(request.POST.get('modelVarName', '').strip()) + print 'modelVarName', modelVarName + precipFlag = request.POST.get('precipFlag', '').strip() + print 'precipFlag', precipFlag + modelTimeVarName = str(request.POST.get('modelTimeVarName', '').strip()) + print 'modelTimeVarName', modelTimeVarName + modelLatVarName = str(request.POST.get('modelLatVarName', '').strip()) + print 'modelLatVarName', modelLatVarName + modelLonVarName = str(request.POST.get('modelLonVarName', '').strip()) + print 'modelLonVarName', modelLonVarName + + regridOption = str(request.POST.get('regridOption', '').strip()) + print 'regridOption', regridOption + timeRegridOption = str(request.POST.get('timeRegridOption', '').strip()) + print 'timeRegridOption', timeRegridOption + seasonalCycleOption = request.POST.get('seasonalCycleOption', '').strip() + print 'seasonalCycleOption', seasonalCycleOption + metricOption = str(request.POST.get('metricOption', '').strip()) + print 'metricOption', metricOption + + settings = {"cacheDir": cachedir, "workDir": workdir, "fileList": filelist} + params = {"obsDatasetId": obsDatasetId, "obsParamId": obsParameterId, + "startTime": startTime, "endTime": endTime, "latMin": latMin, + "latMax": latMax, "lonMin": lonMin, "lonMax": lonMax} + model = {"varName": modelVarName, "timeVariable": modelTimeVarName, + "latVariable": modelLatVarName, "lonVariable": modelLonVarName} + mask = {"latMin": latMin, "latMax": latMax, "lonMin": lonMin, "lonMax": lonMax} + options = {"regrid": regridOption, "timeRegrid": timeRegridOption, + "seasonalCycle": seasonalCycleOption, "metric": metricOption, + "plotTitle": titleOption, "plotFilename": plotFileNameOption, + "mask": maskOption, "precip": precipFlag} + + awesome.do_rcmes(settings, params, model, mask, options) + + model_path = os.path.join(workdir, plotFileNameOption + "model.png") + obs_path = os.path.join(workdir, plotFileNameOption + "obs.png") + comp_path = os.path.join(workdir, plotFileNameOption + ".png") + + product_dict = {'modelPath':model_path, + 'obsPath': obs_path, + 'comparisonPath':comp_path} + + #Extra Code in case bottle has an issue with my Dictionary + #json_output = json.dumps(product_dict, sort_keys=True, indent=4) + + return product_dict Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/services/run_rcmes_processing.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/__init__.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/__init__.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/__init__.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/__init__.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,2 @@ +"""This Package is used to handle various forms of data storage from opening +files to accessing the RCMED and returning data in the form of numpy arrays""" \ No newline at end of file Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/__init__.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py Thu Oct 31 14:59:48 2013 @@ -0,0 +1,359 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Collection of functions used to interface with the database and to create netCDF file +""" +import os +import urllib2 +import re +import numpy as np +import numpy.ma as ma +import json +import netCDF4 + +from classes import RCMED +from toolkit import process +from datetime import timedelta ,datetime +from calendar import monthrange + +def reorderXYT(lons, lats, times, values): + # Re-order values in values array such that when reshaped everywhere is where it should be + # (as DB doesn't necessarily return everything in order) + order = np.lexsort((lons, lats, times)) + counter = 0 + sortedValues = np.zeros_like(values) + sortedLats = np.zeros_like(lats) + sortedLons = np.zeros_like(lons) + for i in order: + sortedValues[counter] = values[i] + sortedLats[counter] = lats[i] + sortedLons[counter] = lons[i] + counter += 1 + + return sortedValues, sortedLats, sortedLons + +def findUnique(seq, idfun=None): + """ + Function to find unique values (used in construction of unique datetime list) + NB. order preserving + Input: seq - a list of randomly ordered values + Output: result - list of ordered values + """ + if idfun is None: + def idfun(x): + return x + + seen = {}; + result = [] + + for item in seq: + marker = idfun(item) + # in old Python versions: + # if seen.has_key(marker) + # but in new ones: + if marker in seen: continue + seen[marker] = 1 + result.append(item) + return result + +def get_param_info(url): + + ''' + This function will get the general information by given URL from the parameter table. + ''' + url = url + "&info=yes" + result = urllib2.urlopen(url) + datastring = result.read() + datastring=json.loads(datastring) + database=datastring["database"] + timestep=datastring["timestep"] + realm=datastring["realm"] + instrument=datastring["instrument"] + start_date=datastring["start_date"] + end_date=datastring["end_date"] + unit=datastring["units"] + + return database, timestep, realm, instrument, start_date, end_date, unit + +def get_data(url): + + ''' + This function will get the url, query from database and will return datapoints' latitude, longitude, level, time and value. + ''' + + result = urllib2.urlopen(url) + datastring = result.read() + d = re.search('data: \r\n', datastring) + data = datastring[d.end():len(datastring)] + + # To create a list of all datapoints + data=data.split('\r\n') + + latitudes = [] + longitudes = [] + levels = [] + values = [] + timestamps = [] + + # To make a series of lists from datapoints + for i in range(len(data)-1): # Because the last row is empty, "len(data)-1" is used. + row=data[i].split(',') + latitudes.append(np.float32(row[0])) + longitudes.append(np.float32(row[1])) + levels.append(np.float32(row[2])) + # timestamps are strings so we will leave them alone for now + timestamps.append(row[3]) + values.append(np.float32(row[4])) + + return latitudes, longitudes, levels, values, timestamps + + +def create_netCDF(latitudes, longitudes, levels, values, timestamps, database, latMin, latMax, lonMin, lonMax, startTime, endTime, unit, netCD_fileName): + + ''' + This function will generate netCDF files. + ''' + + # To generate netCDF file from database + netcdf = netCDF4.Dataset(netCD_fileName,mode='w') + string="The netCDF file for parameter: " + database + ", latMin: " + str(latMin) + ", latMax: " + str(latMax) + ", lonMin: " + str(lonMin) + ", lonMax: " + str(lonMax) + " startTime: " + str(startTime) + " and endTime: " + str(endTime) + "." + netcdf.globalAttName = str(string) + netcdf.createDimension('dim', len(latitudes)) + latitude = netcdf.createVariable('lat', 'd', ('dim',)) + longitude = netcdf.createVariable('lon', 'd', ('dim',)) + level = netcdf.createVariable('lev', 'd', ('dim',)) + time = netcdf.createVariable('time', 'd', ('dim',)) + value = netcdf.createVariable('value', 'd', ('dim',)) + + netcdf.variables['lat'].varAttName = 'latitude' + netcdf.variables['lat'].units = 'degrees_north' + netcdf.variables['lon'].varAttName = 'longitude' + netcdf.variables['lon'].units = 'degrees_east' + netcdf.variables['time'].varAttName = 'time' + netcdf.variables['time'].units = 'hours since ' + str(startTime) + netcdf.variables['value'].varAttName = 'value' + netcdf.variables['value'].units = str(unit) + netcdf.variables['lev'].varAttName = 'level' + netcdf.variables['lev'].units = 'hPa' + + hours=[] + timeFormat = "%Y-%m-%d %H:%M:%S" + base_date=startTime + # To convert the date to hours + for t in timestamps: + date=datetime.strptime(t, timeFormat) + diff=date-base_date + hours.append(diff.days*24) + + latitude[:]=latitudes[:] + longitude[:]=longitudes[:] + level[:]=levels[:] + time[:]=hours[:] + value[:]=values[:] + netcdf.close() + +def read_netcdf(netCD_fileName): + + ''' + This function will read the existed netCDF file, convert the hours from netCDF time variable + and return latitudes, longitudes, levels, times and values. + ''' + # To use the created netCDF file + netcdf = netCDF4.Dataset(netCD_fileName , mode='r') + # To get all data from netCDF file + latitudes = netcdf.variables['lat'][:] + longitudes = netcdf.variables['lon'][:] + levels = netcdf.variables['lev'][:] + hours = netcdf.variables['time'][:] + values = ma.array(netcdf.variables['value'][:]) + + # To get the base date + time_unit=netcdf.variables['time'].units.encode() + time_unit=time_unit.split(' ') + base_date=time_unit[2] + " " + time_unit[3] + + netcdf.close() + + timeFormat = "%Y-%m-%d %H:%M:%S" + + # Because time in netCDF file is based on hours since a specific date, it needs to be converted to date format + times=[] + # To convert the base date to the python datetime format + base_date = datetime.strptime(base_date, timeFormat) + for each in range(len(hours)): + hour=timedelta(hours[each]/24) + eachTime=base_date + hour + times.append(str(eachTime.year) + '-' + str("%02d" % (eachTime.month)) + '-' + str("%02d" % (eachTime.day)) + ' ' + str("%02d" % (eachTime.hour)) + ':' + str("%02d" % (eachTime.minute)) + ':' + str("%02d" % (eachTime.second))) + + return latitudes, longitudes, levels, times, values + + +def improve_data(latitudes, longitudes, levels, times, values, timestep): + + # Make arrays of unique latitudes, longitudes, levels and times + uniqueLatitudes = np.unique(latitudes) + uniqueLongitudes = np.unique(longitudes) + uniqueLevels = np.unique(levels) + uniqueTimestamps = np.unique(times) + + # Calculate nx and ny + uniqueLongitudeCount = len(uniqueLongitudes) + uniqueLatitudeCount = len(uniqueLatitudes) + uniqueLevelCount = len(uniqueLevels) + uniqueTimeCount = len(uniqueTimestamps) + + values, latitudes, longitudes = reorderXYT(longitudes, latitudes, times, values) + + # Convert each unique time from strings into list of Python datetime objects + # TODO - LIST COMPS! + timeFormat = "%Y-%m-%d %H:%M:%S" + timesUnique = [datetime.strptime(t, timeFormat) for t in uniqueTimestamps] + timesUnique.sort() + timesUnique = process.normalizeDatetimes(timesUnique, timestep) + + # Reshape arrays + latitudes = latitudes.reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount) + longitudes = longitudes.reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount) + levels = np.array(levels).reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount) + values = values.reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount) + + # Flatten dimension if only single level + if uniqueLevelCount == 1: + values = values[:, :, :, 0] + latitudes = latitudes[0, :, :, 0] + longitudes = longitudes[0, :, :, 0] + + # Created masked array to deal with missing values + # -these make functions like values.mean(), values.max() etc ignore missing values + mdi = -9999 # TODO: extract this value from the DB retrieval metadata + mdata = ma.masked_array(values, mask=(values == mdi)) + + + return latitudes, longitudes, uniqueLevels, timesUnique, mdata + + +def extractData ( datasetID, paramID, latMin, latMax, lonMin, lonMax, userStartTime, userEndTime, cachedir, timestep ): + + """ + Main function to extract data from DB into numpy masked arrays, and also to create monthly netCDF file as cache + + Input:: + datasetID, paramID: required identifiers of data in database + latMin, latMax, lonMin, lonMax: location range to extract data for + startTime, endTime: python datetime objects describing required time range to extract + cachedir: directory path used to store temporary cache files + timestep: "daily" | "monthly" so we can be sure to query the RCMED properly + Output: + uniqueLatitudes,uniqueLongitudes: 1d-numpy array of latitude and longitude grid values + uniqueLevels: 1d-numpy array of vertical level values + timesUnique: list of python datetime objects describing times of returned data + mdata: masked numpy arrays of data values + """ + + url = RCMED.jplUrl(datasetID, paramID, latMin, latMax, lonMin, lonMax, userStartTime, userEndTime, cachedir, timestep) + + # To get the parameter's information from parameter table + database, timestep, realm, instrument, dbStartDate, dbEndDate, unit = get_param_info(url) + + # Create a directory inside the cache directory + name = [] + # activity is a fix value + activity = "obs4cmip5" + name.append(activity) + # product is a fix value + product = "observations" + name.append(product) + # realm, variable,frequency and instrument will be get from parameter table + realm = realm + name.append(realm) + variable = database + name.append(variable) + frequency = timestep + name.append(frequency) + data_structure = "grid" + name.append(data_structure) + institution = "NASA" + name.append(institution) + project = "RCMES" + name.append(project) + instrument = instrument + name.append(instrument) + version = "v1" + name.append(version) + + # Check to see whether the folder is already created for netCDF or not, then it will be created + temp_path = cachedir + for n in name: + path = os.path.join(temp_path, n) + if os.path.exists(path): + temp_path = path + pass + else: + os.mkdir(path) + temp_path = path + + processing_level = 'L3' + processing_version = "processing_version" # the processing version is still unknown and can be added later + + timeFormat = "%Y-%m-%d %H:%M:%S" + + date_list, lats, longs, uniqueLevls, uniqueTimes, vals = [], [], [], [], [], [] + + # To make a list (date_list) of all months available based on user time request + while userStartTime <= userEndTime: + #To get the beginning of month + beginningOfMonth = str("%04d" % userStartTime.year) + "-" + str("%02d" % userStartTime.month) + "-" + "01 00:00:00" + #To get the end of month + endOfMonth = str("%04d" % userStartTime.year) + "-" + str("%02d" % userStartTime.month) + "-" + str(monthrange(userStartTime.year,userStartTime.month)[1]) + " 00:00:00" + #To convert both beginning and end of month from string to Python datetime format + beginningOfMonth = datetime.strptime(beginningOfMonth, timeFormat) + endOfMonth = datetime.strptime(endOfMonth, timeFormat) + #To add beginning and end of month as a list to the date_list list + date_list.append([beginningOfMonth, endOfMonth]) + #To get the beginning of next month + userStartTime= endOfMonth + timedelta(days=1) + + + # To loop over all months and return data + for i, date in enumerate(date_list): + netCDF_name = variable + '_' + project + '_' + processing_level + '_' + processing_version + '_' + str(latMin) + '_' + str(latMax) + '_' + str(lonMin) + '_' + str(lonMax) + '_' + str("%04d" % date[0].year) + str("%02d" % date[0].month) + '.nc' + + # To check if netCDF file exists, then use it + if os.path.exists(path+"/"+ netCDF_name): + latitudes, longitudes, levels, times, values = read_netcdf(path + "/" + netCDF_name) + + # If the netCDF file does not exist, then create one and read it. + else: + # To just query for one year of data + print "%s of %s Database Download(s) Complete" % (i, len(date_list)) + url = RCMED.jplUrl(datasetID, paramID, latMin, latMax, lonMin, lonMax, date[0], date[1], cachedir, timestep) + + # To get data from DB + latitudes, longitudes, levels, values, timestamps = get_data(url) + create_netCDF(latitudes, longitudes, levels, values, timestamps, database, latMin, latMax, lonMin, lonMax, date[0], date[1], unit, path + "/" + netCDF_name) + + # To read from netCDF files + latitudes, longitudes, levels, times, values = read_netcdf(path + "/" + netCDF_name) + + lats=np.append(lats,latitudes) + longs=np.append(longs,longitudes) + uniqueLevls=np.append(uniqueLevls,levels) + uniqueTimes=np.append(uniqueTimes,times) + vals=np.append(vals,values) + + latitudes, longitudes, uniqueLevels, timesUnique, mdata = improve_data(lats, longs, uniqueLevls, uniqueTimes, vals, timestep) + + return latitudes, longitudes, uniqueLevels, timesUnique, mdata Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py ------------------------------------------------------------------------------ svn:executable = * Added: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py.old URL: http://svn.apache.org/viewvc/incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py.old?rev=1537520&view=auto ============================================================================== --- incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py.old (added) +++ incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py.old Thu Oct 31 14:59:48 2013 @@ -0,0 +1,339 @@ +"""Collection of functions used to interface with the database and to create netCDF file +""" +import os +import urllib2 +import re +import numpy as np +import numpy.ma as ma +import json +import netCDF4 + +from classes import RCMED +from toolkit import process +from datetime import timedelta ,datetime +from calendar import monthrange + +def reorderXYT(lons, lats, times, values): + # Re-order values in values array such that when reshaped everywhere is where it should be + # (as DB doesn't necessarily return everything in order) + order = np.lexsort((lons, lats, times)) + counter = 0 + sortedValues = np.zeros_like(values) + sortedLats = np.zeros_like(lats) + sortedLons = np.zeros_like(lons) + for i in order: + sortedValues[counter] = values[i] + sortedLats[counter] = lats[i] + sortedLons[counter] = lons[i] + counter += 1 + + return sortedValues, sortedLats, sortedLons + +def findUnique(seq, idfun=None): + """ + Function to find unique values (used in construction of unique datetime list) + NB. order preserving + Input: seq - a list of randomly ordered values + Output: result - list of ordered values + """ + if idfun is None: + def idfun(x): + return x + + seen = {}; + result = [] + + for item in seq: + marker = idfun(item) + # in old Python versions: + # if seen.has_key(marker) + # but in new ones: + if marker in seen: continue + seen[marker] = 1 + result.append(item) + return result + +def get_param_info(url): + + ''' + This function will get the general information by given URL from the parameter table. + ''' + url = url + "&info=yes" + result = urllib2.urlopen(url) + datastring = result.read() + datastring=json.loads(datastring) + database=datastring["database"] + timestep=datastring["timestep"] + realm=datastring["realm"] + instrument=datastring["instrument"] + start_date=datastring["start_date"] + end_date=datastring["end_date"] + unit=datastring["units"] + + return database, timestep, realm, instrument, start_date, end_date, unit + +def get_data(url): + + ''' + This function will get the url, query from database and will return datapoints' latitude, longitude, level, time and value. + ''' + + result = urllib2.urlopen(url) + datastring = result.read() + d = re.search('data: \r\n', datastring) + data = datastring[d.end():len(datastring)] + + # To create a list of all datapoints + data=data.split('\r\n') + + latitudes = [] + longitudes = [] + levels = [] + values = [] + timestamps = [] + + # To make a series of lists from datapoints + for i in range(len(data)-1): # Because the last row is empty, "len(data)-1" is used. + row=data[i].split(',') + latitudes.append(np.float32(row[0])) + longitudes.append(np.float32(row[1])) + levels.append(np.float32(row[2])) + # timestamps are strings so we will leave them alone for now + timestamps.append(row[3]) + values.append(np.float32(row[4])) + + return latitudes, longitudes, levels, values, timestamps + + +def create_netCDF(latitudes, longitudes, levels, values, timestamps, database, latMin, latMax, lonMin, lonMax, startTime, endTime, unit, netCD_fileName): + + ''' + This function will generate netCDF files. + ''' + + # To generate netCDF file from database + netcdf = netCDF4.Dataset(netCD_fileName,'w') + string="The netCDF file for parameter: " + database + ", latMin: " + str(latMin) + ", latMax: " + str(latMax) + ", lonMin: " + str(lonMin) + ", lonMax: " + str(lonMax) + " startTime: " + str(startTime) + " and endTime: " + str(endTime) + "." + netcdf.globalAttName = str(string) + netcdf.createDimension('dim', len(latitudes)) + latitude = netcdf.createVariable('lat', 'd', ('dim',)) + longitude = netcdf.createVariable('lon', 'd', ('dim',)) + level = netcdf.createVariable('lev', 'd', ('dim',)) + time = netcdf.createVariable('time', 'd', ('dim',)) + value = netcdf.createVariable('value', 'd', ('dim',)) + + netcdf.variables['lat'].varAttName = 'latitude' + netcdf.variables['lat'].units = 'degrees_north' + netcdf.variables['lon'].varAttName = 'longitude' + netcdf.variables['lon'].units = 'degrees_east' + netcdf.variables['time'].varAttName = 'time' + netcdf.variables['time'].units = 'hours since ' + str(startTime) + netcdf.variables['value'].varAttName = 'value' + netcdf.variables['value'].units = str(unit) + netcdf.variables['lev'].varAttName = 'level' + netcdf.variables['lev'].units = 'hPa' + + hours=[] + timeFormat = "%Y-%m-%d %H:%M:%S" + base_date=startTime + # To convert the date to hours + for t in timestamps: + date=datetime.strptime(t, timeFormat) + diff=date-base_date + hours.append(diff.days*24) + + latitude[:]=latitudes[:] + longitude[:]=longitudes[:] + level[:]=levels[:] + time[:]=hours[:] + value[:]=values[:] + netcdf.close() + +def read_netcdf(netCD_fileName): + + ''' + This function will read the existed netCDF file, convert the hours from netCDF time variable + and return latitudes, longitudes, levels, times and values. + ''' + # To use the created netCDF file + netcdf = netCDF4.Dataset(netCD_fileName , mode='r') + # To get all data from netCDF file + latitudes = netcdf.variables['lat'][:] + longitudes = netcdf.variables['lon'][:] + levels = netcdf.variables['lev'][:] + hours = netcdf.variables['time'][:] + values = netcdf.variables['value'][:] + + # To get the base date + time_unit=netcdf.variables['time'].units.encode() + time_unit=time_unit.split(' ') + base_date=time_unit[2] + " " + time_unit[3] + + netcdf.close() + + timeFormat = "%Y-%m-%d %H:%M:%S" + + # Because time in netCDF file is based on hours since a specific date, it needs to be converted to date format + times=[] + # To convert the base date to the python datetime format + base_date = datetime.strptime(base_date, timeFormat) + for each in range(len(hours)): + hour=timedelta(hours[each]/24) + eachTime=base_date + hour + times.append(str(eachTime.year) + '-' + str("%02d" % (eachTime.month)) + '-' + str("%02d" % (eachTime.day)) + ' ' + str("%02d" % (eachTime.hour)) + ':' + str("%02d" % (eachTime.minute)) + ':' + str("%02d" % (eachTime.second))) + + return latitudes, longitudes, levels, times, values + + +def improve_data(latitudes, longitudes, levels, times, values, timestep): + + # Make arrays of unique latitudes, longitudes, levels and times + uniqueLatitudes = np.unique(latitudes) + uniqueLongitudes = np.unique(longitudes) + uniqueLevels = np.unique(levels) + uniqueTimestamps = np.unique(times) + + # Calculate nx and ny + uniqueLongitudeCount = len(uniqueLongitudes) + uniqueLatitudeCount = len(uniqueLatitudes) + uniqueLevelCount = len(uniqueLevels) + uniqueTimeCount = len(uniqueTimestamps) + + values, latitudes, longitudes = reorderXYT(longitudes, latitudes, times, values) + + # Convert each unique time from strings into list of Python datetime objects + # TODO - LIST COMPS! + timeFormat = "%Y-%m-%d %H:%M:%S" + timesUnique = [datetime.strptime(t, timeFormat) for t in uniqueTimestamps] + timesUnique.sort() + timesUnique = process.normalizeDatetimes(timesUnique, timestep) + + # Reshape arrays + latitudes = latitudes.reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount) + longitudes = longitudes.reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount) + levels = np.array(levels).reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount) + values = values.reshape(uniqueTimeCount, uniqueLatitudeCount, uniqueLongitudeCount, uniqueLevelCount) + + # Flatten dimension if only single level + if uniqueLevelCount == 1: + values = values[:, :, :, 0] + latitudes = latitudes[0, :, :, 0] + longitudes = longitudes[0, :, :, 0] + + # Created masked array to deal with missing values + # -these make functions like values.mean(), values.max() etc ignore missing values + mdi = -9999 # TODO: extract this value from the DB retrieval metadata + mdata = ma.masked_array(values, mask=(values == mdi)) + + + return latitudes, longitudes, uniqueLevels, timesUnique, mdata + + +def extractData ( datasetID, paramID, latMin, latMax, lonMin, lonMax, userStartTime, userEndTime, cachedir, timestep ): + + """ + Main function to extract data from DB into numpy masked arrays, and also to create monthly netCDF file as cache + + Input:: + datasetID, paramID: required identifiers of data in database + latMin, latMax, lonMin, lonMax: location range to extract data for + startTime, endTime: python datetime objects describing required time range to extract + cachedir: directory path used to store temporary cache files + timestep: "daily" | "monthly" so we can be sure to query the RCMED properly + Output: + uniqueLatitudes,uniqueLongitudes: 1d-numpy array of latitude and longitude grid values + uniqueLevels: 1d-numpy array of vertical level values + timesUnique: list of python datetime objects describing times of returned data + mdata: masked numpy arrays of data values + """ + + url = RCMED.jplUrl(datasetID, paramID, latMin, latMax, lonMin, lonMax, userStartTime, userEndTime, cachedir, timestep) + + # To get the parameter's information from parameter table + database, timestep, realm, instrument, dbStartDate, dbEndDate, unit = get_param_info(url) + + # Create a directory inside the cache directory + name = [] + # activity is a fix value + activity = "obs4cmip5" + name.append(activity) + # product is a fix value + product = "observations" + name.append(product) + # realm, variable,frequency and instrument will be get from parameter table + realm = realm + name.append(realm) + variable = database + name.append(variable) + frequency = timestep + name.append(frequency) + data_structure = "grid" + name.append(data_structure) + institution = "NASA" + name.append(institution) + project = "RCMES" + name.append(project) + instrument = instrument + name.append(instrument) + version = "v1" + name.append(version) + + # Check to see whether the folder is already created for netCDF or not, then it will be created + for n in name: + path = os.path.join(cachedir, n) + if os.path.exists(path): + pass + else: + os.mkdir(path) + + processing_level = 'L3' + processing_version = "processing_version" # the processing version is still unknown and can be added later + + timeFormat = "%Y-%m-%d %H:%M:%S" + + date_list, lats, longs, uniqueLevls, uniqueTimes, vals = [], [], [], [], [], [] + + # To make a list (date_list) of all months available based on user time request + while userStartTime <= userEndTime: + #To get the beginning of month + beginningOfMonth = str("%04d" % userStartTime.year) + "-" + str("%02d" % userStartTime.month) + "-" + "01 00:00:00" + #To get the end of month + endOfMonth = str("%04d" % userStartTime.year) + "-" + str("%02d" % userStartTime.month) + "-" + str(monthrange(userStartTime.year,userStartTime.month)[1]) + " 00:00:00" + #To convert both beginning and end of month from string to Python datetime format + beginningOfMonth = datetime.strptime(beginningOfMonth, timeFormat) + endOfMonth = datetime.strptime(endOfMonth, timeFormat) + #To add beginning and end of month as a list to the date_list list + date_list.append([beginningOfMonth, endOfMonth]) + #To get the beginning of next month + userStartTime= endOfMonth + timedelta(days=1) + + print 'Starting retrieval data (this may take several minutes) ...... ' + # To loop over all months and return data + for date in date_list: + netCDF_name = variable + '_' + project + '_' + processing_level + '_' + processing_version + '_' + str(latMin) + '_' + str(latMax) + '_' + str(lonMin) + '_' + str(lonMax) + '_' + str("%04d" % date[0].year) + str("%02d" % date[0].month) + '.nc' + + # To check if netCDF file exists, then use it + if os.path.exists(path+"/"+ netCDF_name): + latitudes, longitudes, levels, times, values = read_netcdf(path + "/" + netCDF_name) + + # If the netCDF file does not exist, then create one and read it. + else: + # To just query for one year of data + url = RCMED.jplUrl(datasetID, paramID, latMin, latMax, lonMin, lonMax, date[0], date[1], cachedir, timestep) + + # To get data from DB + latitudes, longitudes, levels, values, timestamps = get_data(url) + create_netCDF(latitudes, longitudes, levels, values, timestamps, database, latMin, latMax, lonMin, lonMax, date[0], date[1], unit, path + "/" + netCDF_name) + + # To read from netCDF files + latitudes, longitudes, levels, times, values = read_netcdf(path + "/" + netCDF_name) + + lats=np.append(lats,latitudes) + longs=np.append(longs,longitudes) + uniqueLevls=np.append(uniqueLevls,levels) + uniqueTimes=np.append(uniqueTimes,times) + vals=np.append(vals,values) + + latitudes, longitudes, uniqueLevels, timesUnique, mdata = improve_data(lats, longs, uniqueLevls, uniqueTimes, vals, timestep) + + return latitudes, longitudes, uniqueLevels, timesUnique, mdata Propchange: incubator/climate/branches/rcmet-2.1.2/src/main/python/rcmes/storage/db.py.old ------------------------------------------------------------------------------ svn:executable = *