Return-Path: X-Original-To: apmail-climate-commits-archive@minotaur.apache.org Delivered-To: apmail-climate-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 8494210AE4 for ; Thu, 1 Aug 2013 23:41:54 +0000 (UTC) Received: (qmail 7151 invoked by uid 500); 1 Aug 2013 23:41:54 -0000 Delivered-To: apmail-climate-commits-archive@climate.apache.org Received: (qmail 7130 invoked by uid 500); 1 Aug 2013 23:41:54 -0000 Mailing-List: contact commits-help@climate.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@climate.incubator.apache.org Delivered-To: mailing list commits@climate.incubator.apache.org Received: (qmail 7123 invoked by uid 99); 1 Aug 2013 23:41:54 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 01 Aug 2013 23:41:54 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 01 Aug 2013 23:41:48 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 6C6392388860; Thu, 1 Aug 2013 23:41:26 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1509471 - /incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py Date: Thu, 01 Aug 2013 23:41:26 -0000 To: commits@climate.incubator.apache.org From: boustani@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20130801234126.6C6392388860@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: boustani Date: Thu Aug 1 23:41:26 2013 New Revision: 1509471 URL: http://svn.apache.org/r1509471 Log: first version of rcmed.py under data_source folder Added: incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py Added: incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py?rev=1509471&view=auto ============================================================================== --- incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py (added) +++ incubator/climate/branches/RefactorInput/ocw/data_source/rcmed.py Thu Aug 1 23:41:26 2013 @@ -0,0 +1,451 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +''' +Classes: + RCMED - A class for retrieving data from Regional Climate Model Evalutaion Database (JPL). +''' + +import urllib, urllib2 +import re +import json +import numpy as np +import numpy.ma as ma +from datetime import datetime +import calendar +from dataset import Dataset + + +URL = 'http://rcmes.jpl.nasa.gov/query-api/query.php?' + + +''' +def normalizeDatetimes(datetimes, time_step): + """ + Input:: + datetimes - list of datetime objects that need to be normalized + time_step - string of value ('daily' | 'monthly') + Output:: + normalDatetimes - list of datetime objects that have been normalized + + Normalization Rules:: + Daily data will be forced to an hour value of 00:00:00 + Monthly data will be forced to the first of the month at midnight + """ + normalDatetimes = [] + if time_step.lower() == 'monthly': + for inputDatetime in datetimes: + if inputDatetime.day != 1: + # Clean the inputDatetime + inputDatetimeString = inputDatetime.strftime('%Y%m%d') + normalInputDatetimeString = inputDatetimeString[:6] + '01' + inputDatetime = datetime.datetime.strptime(normalInputDatetimeString, '%Y%m%d') + + normalDatetimes.append(inputDatetime) + + elif time_step.lower() == 'daily': + for inputDatetime in datetimes: + if inputDatetime.hour != 0 or inputDatetime.minute != 0 or inputDatetime.second != 0: + datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S') + normalDatetimeString = datetimeString[:8] + '000000' + inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S') + + normalDatetimes.append(inputDatetime) + + + return normalDatetimes +''' + +''' +def _expand_date(start_time, end_time, time_step): + if time_step.lower() == 'monthly': + if start_time.day != 1: + # Clean the startTime + startTimeString = start_time.strftime('%Y%m%d') + normalInputDatetimeString = startTimeString[:6] + '01' + start_time = datetime.strptime(normalInputDatetimeString, '%Y%m%d') + ##TODO: Change the 3 lines above with this line: + ##start_time = datetime(start_time.year, start_time.month, 1) + + + lastDayOfMonth = calendar.monthrange(end_time.year, end_time.month)[1] + if end_time.day != lastDayOfMonth: + # Clean the endTime + endTimeString = end_time.strftime('%Y%m%d') + endTimeString = endTimeString[:6] + str(lastDayOfMonth) + end_time = datetime.strptime(endTimeString, '%Y%m%d') + ##TODO: Change the 3 lines above with this line: + ##end_time = datetime(end_time.year, end_time.month, lastDayOfMonth) + + elif time_step.lower() == 'daily': + if start_time.hour != 0 or start_time.minute != 0 or start_time.second != 0: + datetimeString = start_time.strftime('%Y%m%d%H%M%S') + normalDatetimeString = datetimeString[:8] + '000000' + start_time = datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S') + ##TODO: Change the 3 lines above with this line: + ##start_time = datetime(start_time.year, start_time.month, start_time.day, 00, 00, 00) + + endTimeString = end_time.strftime('%Y%m%d%H%M%S') + endTimeString = endTimeString[:8] + '235959' + end_time = datetime.strptime(endTimeString, '%Y%m%d%H%M%S') + ##TODO: Change the 3 lines above with this line: + ##end_time = datetime(end_time.year, end_time.month, end_time.day, 23, 59, 59) + + return start_time, end_time +''' + +''' +def _reshape_arrays(lats, lons, levels, values, unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count): + + # Reshape arrays + lats = lats.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count) + lons = lons.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count) + levels = np.array(levels).reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count) + values = values.reshape(unique_times_count, unique_lats_count, unique_lons_count, unique_levels_count) + + # Flatten dimension if only single level + if unique_levels_count == 1: + values = values[:, :, :, 0] + lats = lats[0, :, :, 0] + lons = lons[0, :, :, 0] + + return lats, lons, levels, values +''' + +''' +def _reorder_data(lats, lons, times, values): + + # Re-order values in values array such that when reshaped everywhere is where it should be + # (as DB doesn't necessarily return everything in order) + order = np.lexsort((lons, lats, times)) + counter = 0 + sorted_values = np.zeros_like(values) + sorted_lats = np.zeros_like(lats) + sorted_lons = np.zeros_like(lons) + for i in order: + sorted_values[counter] = values[i] + sorted_lats[counter] = lats[i] + sorted_lons[counter] = lons[i] + counter += 1 + + return sorted_lats, sorted_lons, sorted_values +''' + +''' +def _calculate_len(unique_lat, unique_lon, unique_level, unique_time): + + unique_lats_count = len(unique_lat) + unique_lons_count = len(unique_lon) + unique_levels_count = len(unique_level) + unique_times_count = len(unique_time) + + return unique_lats_count, unique_lons_count, unique_levels_count, unique_times_count +''' + + +def parameters_metadata(): + ''' + ''' + pass + + +def _make_mask_array(values): + ''' + ''' + + # Created masked array to deal with missing values + # -these make functions like values.mean(), values.max() etc ignore missing values + mdi = -9999 # TODO: extract this value from the DB retrieval metadata + values = ma.masked_array(values, mask=(values == mdi)) + + return values + + +def _reshape_values(values, unique_values): + '''Reshape values into 4D array. + + :param values: Raw values data + :type values: numpy array + :param unique_values: Tuple of unique latitudes, longitudes, levels and times data. + :type unique_values: Tuple + + :returns: Reshaped values data + :rtype: Numpy array + ''' + + lats_len = len(unique_values[0]) + lons_len = len(unique_values[1]) + levels_len = len(unique_values[2]) + times_len = len(unique_values[3]) + + values = values.reshape(levels_len, times_len, lats_len, lons_len) + + return values + + +def _calculate_time(unique_times, time_step): + '''Convert each time to the datetime object. + + :param unique_times: Unique time data + :type unique_times: String + :param time_step: Time step + :type time_step: String + + :returns: Unique datetime objects of time data + :rtype: List + ''' + + time_format = "%Y-%m-%d %H:%M:%S" + unique_times = [datetime.strptime(time, time_format) for time in unique_times] + #There is no need to sort time. + #This function may required still in RCMES + #unique_times.sort() + #This function should be moved to the data_process. + #unique_times = normalizeDatetimes(unique_times, time_step) + + return unique_times + + +def _make_unique(lats, lons, levels, times): + '''Find the unique values of input data. + + :param lats: lats + :type lats: Numpy array + :param lons: lons + :type lons: Numpy array + :param levels: levels + :type levels: Numpy array + :param times: times + :type times: Numpy array + + :returns: Unique numpy arrays of latitudes, longitudes, levels and times + :rtype: Tuple + ''' + + unique_lats = np.unique(lats) + unique_lons = np.unique(lons) + unique_levels = np.unique(levels) + unique_times = np.unique(times) + + return (unique_lats, unique_lons, unique_levels, unique_times) + + +def _get_data(url): + '''Reterive data from database. + + :param url: url to query from database + :type url: String + + :returns: Latitudes, longitudes, levels, times and values data + :rtype: (list, list, list, list, list) + ''' + + string = urllib2.urlopen(url) + data_string = string.read() + index_of_data = re.search('data: \r\n', data_string) + data = data_string[index_of_data.end():len(data_string)] + data = data.split('\r\n') + + lats = [] + lons = [] + levels = [] + values = [] + times = [] + + for i in range(len(data) - 1): # Because the last row is empty, "len(data)-1" is used. + row = data[i].split(',') + lats.append(np.float32(row[0])) + lons.append(np.float32(row[1])) + levels.append(np.float32(row[2])) + times.append(row[3]) + values.append(np.float32(row[4])) + + return lats, lons, levels, times, values + + +def _beginning_of_date(time, time_step): + '''Calculate the beginning of given time, based on time step. + + :param time: Given time + :type time: Datetime + :param time_step: Time step (monthly or daily) + :type time_step: String + + :returns: Beginning of given time + :rtype: Datetime + ''' + + if time_step.lower() == 'monthly': + if time.day != 1: + start_time_string = time.strftime('%Y%m%d') + start_time_string = start_time_string[:6] + '01' + time = datetime.strptime(start_time_string, '%Y%m%d') + ##TODO: Change the 3 lines above with this line: + ##time = datetime(time.year, time.month, 1) + elif time_step.lower() == 'daily': + if time.hour != 0 or time.minute != 0 or time.second != 0: + start_time_string = time.strftime('%Y%m%d%H%M%S') + start_time_string = start_time_string[:8] + '000000' + time = datetime.strptime(start_time_string, '%Y%m%d%H%M%S') + ##TODO: Change the 3 lines above with this line: + ##time = datetime(time.year, time.month, time.day, 00, 00, 00) + + return time + + +def _end_of_date(time, time_step): + '''Calculate the end of given time, based on time step. + + :param time: Given time + :type time: Datetime + :param time_step: Time step (monthly or daily) + :type time_step: String + + :returns: End of given time + :rtype: Datetime + ''' + + last_day_of_month = calendar.monthrange(time.year, time.month)[1] + if time.day != last_day_of_month: + end_time_string = time.strftime('%Y%m%d') + end_time_string = end_time_string[:6] + str(last_day_of_month) + time = datetime.strptime(end_time_string, '%Y%m%d') + ##TODO: Change the 3 lines above with this line: + ##time = datetime(time.year, time.month, lastDayOfMonth) + elif time_step.lower() == 'daily': + end_time_string = time.strftime('%Y%m%d%H%M%S') + end_time_string = end_time_string[:8] + '235959' + time = datetime.strptime(end_time_string, '%Y%m%d%H%M%S') + ##TODO: Change the 3 lines above with this line: + ##time = datetime(time.year, time.month, end_time.day, 23, 59, 59) + + return time + + + +def _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step): + '''Generate the url to query from database + + :param dataset_id: Dataset id. + :type dataset_id: Integer + :param parameter_id: Parameter id + :type parameter_id: Integer + :param min_lat: Minimum latitude + :type min_lat: Float + :param max_lat: Maximum latitude + :type max_lat: Float + :param min_lon: Minimum longitude + :type min_lon: Float + :param max_lon: Maximum longitude + :type max_lon: Float + :param start_time: Start time + :type start_time: Datetime + :param end_time: End time + :type end_time: Datetime + :param time_step: Time step + :type time_step: String + + :returns: url to query from database + :rtype: String + ''' + + start_time = _beginning_of_date(start_time, time_step) + end_time = _end_of_date(end_time, time_step) + start_time = start_time.strftime("%Y%m%dT%H%MZ") + end_time = end_time.strftime("%Y%m%dT%H%MZ") + + query = [('datasetId',dataset_id), ('parameterId',parameter_id), ('latMin',min_lat), ('latMax',max_lat), + ('lonMin', min_lon), ('lonMax',max_lon), ('timeStart', start_time), ('timeEnd', end_time)] + + query_url = urllib.urlencode(query) + url_request = URL + query_url + + return url_request + + + +def _get_parameter_info(dataset_id, parameter_id): + '''General information for given parameter id. + + :param dataset_id: Dataset id. + :type dataset_id: Integer + :param parameter_id: Parameter id + :type parameter_id: Integer + + :returns: Database name, time step, realm, instrument, start_date, end_date and unit for given parameter + :rtype: (string, string, string, string, string, string, string) + ''' + + query = [('datasetId',dataset_id), ('parameterId',parameter_id)] + query_url = urllib.urlencode(query) + url = URL + query_url + "&info=yes" + string = urllib2.urlopen(url) + data_string = string.read() + data_string = json.loads(data_string) + database = data_string["database"] + time_step = data_string["timestep"] + realm = data_string["realm"] + instrument = data_string["instrument"] + start_date = data_string["start_date"] + end_date = data_string["end_date"] + unit = data_string["units"] + + return (database, time_step, realm, instrument, start_date, end_date, unit) + + + +def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time): + '''Get data from one database(parameter). + + :param dataset_id: Dataset id. + :type dataset_id: Integer + :param parameter_id: Parameter id + :type parameter_id: Integer + :param min_lat: Minimum latitude + :type min_lat: Float + :param max_lat: Maximum latitude + :type max_lat: Float + :param min_lon: Minimum longitude + :type min_lon: Float + :param max_lon: Maximum longitude + :type max_lon: Float + :param start_time: Start time + :type start_time: Datetime + :param end_time: End time + :type end_time: Datetime + + :returns: Dataset object + :rtype: Object + ''' + + parameter_info = _get_parameter_info(dataset_id, parameter_id) + time_step = parameter_info[1] + parameter_name = parameter_info[0] + url = _generate_query_url(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, start_time, end_time, time_step) + lats, lons, levels, times, values = _get_data(url) + + lats = np.array(lats) + lons = np.array(lons) + times = np.array(times) + values = np.array(values) + + unique_lats_lons_levels_times = _make_unique(lats, lons, levels, times) + unique_times = _calculate_time(unique_lats_lons_levels_times[3], time_step) + values = _reshape_values(values, unique_lats_lons_levels_times) + values = _make_mask_array(values) + + return Dataset(unique_lats_lons_levels_times[0], unique_lats_lons_levels_times[1], unique_times, values, parameter_name) \ No newline at end of file