From commits-return-2785-archive-asf-public=cust-asf.ponee.io@climate.apache.org Fri Feb 23 06:53:21 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 55CDF18067E for ; Fri, 23 Feb 2018 06:53:21 +0100 (CET) Received: (qmail 91277 invoked by uid 500); 23 Feb 2018 05:53:19 -0000 Mailing-List: contact commits-help@climate.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@climate.apache.org Delivered-To: mailing list commits@climate.apache.org Received: (qmail 91253 invoked by uid 99); 23 Feb 2018 05:53:19 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 23 Feb 2018 05:53:19 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 07142DFE46; Fri, 23 Feb 2018 05:53:19 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: huikyole@apache.org To: commits@climate.apache.org Date: Fri, 23 Feb 2018 05:53:18 -0000 Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: [1/2] climate git commit: CLIMATE-744 Cannot load TRMM data from RCMED Repository: climate Updated Branches: refs/heads/master 1a851ffd0 -> 513dcc438 CLIMATE-744 Cannot load TRMM data from RCMED Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/cc3b36c8 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/cc3b36c8 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/cc3b36c8 Branch: refs/heads/master Commit: cc3b36c87f7e861bd9797fd90fc2156d1aa7df32 Parents: e8d8d42 Author: michaelarthuranderson Authored: Sun Feb 11 19:34:08 2018 -0500 Committer: michaelarthuranderson Committed: Sun Feb 11 19:34:08 2018 -0500 ---------------------------------------------------------------------- ocw/data_source/rcmed.py | 78 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/cc3b36c8/ocw/data_source/rcmed.py ---------------------------------------------------------------------- diff --git a/ocw/data_source/rcmed.py b/ocw/data_source/rcmed.py index 0feb045..69d4628 100644 --- a/ocw/data_source/rcmed.py +++ b/ocw/data_source/rcmed.py @@ -339,9 +339,10 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_l parameters_metadata = get_parameters_metadata() parameter_name, time_step, _, _, _, _, parameter_units = _get_parameter_info( parameters_metadata, parameter_id) - url = _generate_query_url(dataset_id, parameter_id, min_lat, - max_lat, min_lon, max_lon, start_time, end_time, time_step) - lats, lons, times, values = _get_data(url) + + lats, lons, times, values = \ + _coalesce_data(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, + start_time, end_time, time_step) unique_lats_lons_times = _make_unique(lats, lons, times) unique_times = _calculate_time(unique_lats_lons_times[2], time_step) @@ -362,3 +363,74 @@ def parameter_dataset(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_l units=parameter_units, name=name, origin=origin) + + +def _coalesce_data(dataset_id, parameter_id, min_lat, max_lat, min_lon, max_lon, + start_time, end_time, time_step): + + """ + Refer to this JIRA: https://issues.apache.org/jira/browse/CLIMATE-744 + + Sometimes RCMED does not seem to return the entire data set when the requested + range of data and / or number of data points are very large. This method breaks + the single large query into several smaller queries and then appends the results. + + :param dataset_id: The RCMED dataset ID. + :param parameter_id: The parameter ID within the RCMED dataset. + :param min_lat: The minimum lat of the dataset boundary. + :param max_lat: The maximum lat of the dataset boundary. + :param min_lon: The minimum lon of the dataset boundary. + :param max_lon: The maximum lon of the dataset boundary. + :param start_time: The start datetime of the dataset boundary. + :param end_time: The end datetime of the dataset boundary. + :param time_step: The timestep to use when segmenting the datetime boundary. + :return: lats, lons, times, and values for the requested dataset / parameter from RCMED. + """ + + lats = None + lons = None + times = None + values = None + + # This is a magic number which strikes a balance between making an excessive number of + # calls to RCMED (e.g. 1) and RCMED not sending back the full data set. + step = 4 + + current_start = start_time + current_end = min(end_time, datetime(current_start.year + step, 12, 31)) + + while True: + + url = _generate_query_url(dataset_id, parameter_id, min_lat, + max_lat, min_lon, max_lon, current_start, current_end, time_step) + + tmp_lats, tmp_lons, tmp_times, tmp_values = _get_data(url) + + if lats is None: + lats = tmp_lats + else: + lats = np.append(lats, tmp_lats) + + if lons is None: + lons = tmp_lons + else: + lons = np.append(lons, tmp_lons) + + if times is None: + times = tmp_times + else: + times = np.append(times, tmp_times) + + if values is None: + values = tmp_values + else: + values = np.append(values, tmp_values) + + if current_end == end_time: + break + + current_start = datetime(current_end.year + 1, 1, 1) + current_end = min(end_time, datetime(current_start.year + step, 12, 31)) + + + return lats, lons, times, values