From commits-return-2069-apmail-climate-commits-archive=climate.apache.org@climate.apache.org Thu Oct 22 00:55:37 2015 Return-Path: X-Original-To: apmail-climate-commits-archive@minotaur.apache.org Delivered-To: apmail-climate-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C87471835D for ; Thu, 22 Oct 2015 00:55:37 +0000 (UTC) Received: (qmail 45990 invoked by uid 500); 22 Oct 2015 00:55:37 -0000 Delivered-To: apmail-climate-commits-archive@climate.apache.org Received: (qmail 45911 invoked by uid 500); 22 Oct 2015 00:55:37 -0000 Mailing-List: contact commits-help@climate.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@climate.apache.org Delivered-To: mailing list commits@climate.apache.org Received: (qmail 45851 invoked by uid 99); 22 Oct 2015 00:55:37 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 22 Oct 2015 00:55:37 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 84A55E38FC; Thu, 22 Oct 2015 00:55:37 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: huikyole@apache.org To: commits@climate.apache.org Date: Thu, 22 Oct 2015 00:55:39 -0000 Message-Id: In-Reply-To: <5200e0e7556b4c92be33178836379ce2@git.apache.org> References: <5200e0e7556b4c92be33178836379ce2@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [3/3] climate git commit: CLIMATE-687 - A new loader to read GPM precipitation data with a file list CLIMATE-687 - A new loader to read GPM precipitation data with a file list - ocw.data_source.local.load_GPM_IMERG_files is added Conflicts: ocw/data_source/local.py Project: http://git-wip-us.apache.org/repos/asf/climate/repo Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/360b5728 Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/360b5728 Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/360b5728 Branch: refs/heads/master Commit: 360b5728f2fb23465a9e897e8136ae08aed92a27 Parents: 6d5c7f8 55d1c4d Author: huikyole Authored: Wed Oct 21 17:54:53 2015 -0700 Committer: huikyole Committed: Wed Oct 21 17:54:53 2015 -0700 ---------------------------------------------------------------------- ocw/data_source/local.py | 53 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/climate/blob/360b5728/ocw/data_source/local.py ---------------------------------------------------------------------- diff --cc ocw/data_source/local.py index 3560b99,50a7f26..9c1d059 --- a/ocw/data_source/local.py +++ b/ocw/data_source/local.py @@@ -341,192 -332,56 +342,244 @@@ def load_multiple_files(file_path return datasets, data_name ++<<<<<<< HEAD +def load_WRF_2d_files_RAIN(file_path=None, + filename_pattern=None, + filelist=None, + name=''): + ''' Load multiple WRF (or nuWRF) original output files containing 2D fields such as precipitation and surface variables into a Dataset. + The dataset can be spatially subset. + :param file_path: Directory to the NetCDF file to load. + :type file_path: :mod:`string` + :param filename_pattern: Path to the NetCDF file to load. + :type filename_pattern: :list:`string` + :param name: (Optional) A name for the loaded dataset. + :type name: :mod:`string` + :returns: An OCW Dataset object with the requested variable's data from + the NetCDF file. + :rtype: :class:`dataset.Dataset` + :raises ValueError: + ''' + + if not filelist: + WRF_files = [] + for pattern in filename_pattern: + WRF_files.extend(glob(file_path + pattern)) + WRF_files.sort() + else: + WRF_files=[line.rstrip('\n') for line in open(filelist)] + + file_object_first = netCDF4.Dataset(WRF_files[0]) + lats = file_object_first.variables['XLAT'][0,:] + lons = file_object_first.variables['XLONG'][0,:] + + times = [] + nfile = len(WRF_files) + for ifile, file in enumerate(WRF_files): + print 'Reading file '+str(ifile+1)+'/'+str(nfile), file + file_object = netCDF4.Dataset(file) + time_struct_parsed = strptime(file[-19:],"%Y-%m-%d_%H:%M:%S") + for ihour in range(24): + times.append(datetime(*time_struct_parsed[:6]) + timedelta(hours=ihour)) + if ifile == 0: + values0= file_object.variables['RAINC'][:]+file_object.variables['RAINNC'][:] + else: + values0= numpy.concatenate((values0, file_object.variables['RAINC'][:]+file_object.variables['RAINNC'][:])) + file_object.close() + times= numpy.array(times) + years = numpy.array([d.year for d in times]) + ncycle = numpy.unique(years).size + print 'ncycle=',ncycle + nt, ny, nx = values0.shape + values = numpy.zeros([nt-ncycle*24, ny, nx]) + times2 = [] + nt2 = nt/ncycle + # remove the first day in each year + nt3 = nt2-24 + t_index = 0 + for icycle in numpy.arange(ncycle): + for it in numpy.arange(nt3)+24: + values[t_index,:] = values0[icycle*nt2+it,:]-values0[icycle*nt2+it-1,:] + times2.append(times[icycle*nt2+it]) + t_index = t_index +1 + variable_name = 'PREC' + variable_unit= 'mm/hr' + times2 = numpy.array(times2) + return Dataset(lats, lons, times2, values, variable_name, units=variable_unit, name=name) + +def load_dataset_from_multiple_netcdf_files(file_list, variable_name, + lat_name=None, lon_name=None, time_name=None, + name='', file_path=None, filename_pattern=None, + mask_file=None, mask_variable=None, mask_value=0): + ''' Load multiple netCDF files from the same source (an observation or a model) into a Dataset. + The dataset can be spatially subset. + :param filelist: A text file including a list of filenames + :type filelist: :mod:`string` + :param variable_name: The variable name to load from the NetCDF file. + :type variable_name: :mod:`string` + :param lat_name: (Optional) The latitude variable name to extract from the + dataset. + :type lat_name: :mod:`string` + :param lon_name: (Optional) The longitude variable name to extract from the + dataset. + :type lon_name: :mod:`string` + :param time_name: (Optional) The time variable name to extract from the + dataset. + :type time_name: :mod:`string` + :param name: (Optional) A name for the loaded dataset. + :type name: :mod:`string` + :param file_path: Directory to the NetCDF file to load. + :type file_path: :mod:`string` + :param filename_pattern: Path to the NetCDF file to load. + :type filename_pattern: :list:`string` + :param mask_file: A netcdf file with two-dimensional mask indices + :type filelist: :mod:`string` + :param mask_variable: The variable name to load from the mask_file. + :type variable_name: :mod:`string` + :param mask_value: an index for spatial subsetting a dataset + :type mask_value: :class:`int` + :returns: An OCW Dataset object with the requested variable's data from + the NetCDF file. + :rtype: :class:`dataset.Dataset` + :raises ValueError: + ''' + nc_files = [] + if not file_list: + for pattern in filename_pattern: + nc_files.extend(glob(file_path + pattern)) + else: + nc_files = [line.rstrip('\n') for line in open(file_list)] + + nc_files.sort() + + dataset0 = load_file(nc_files[0], variable_name=variable_name, lat_name=lat_name, lon_name=lon_name, time_name=time_name) + if dataset0.lons.ndim == 1 and dataset0.lats.ndim ==1: + lons, lats = numpy.meshgrid(dataset0.lons, dataset0.lats) + elif dataset0.lons.ndim == 2 and dataset0.lats.ndim ==2: + lons = dataset0.lons + lats = dataset0.lats + + if mask_file: + mask_dataset = load_file(mask_file, mask_variable) + y_index, x_index = numpy.where(mask_dataset.values == mask_value) + + times = [] + nfile = len(nc_files) + for ifile, file in enumerate(nc_files): + print 'NC file '+str(ifile+1)+'/'+str(nfile), file + file_object0= load_file(file, variable_name) + values0= file_object0.values + times.extend(file_object0.times) + if mask_file: + values0 = values0[:,y_index, x_index] + if ifile == 0: + data_values = values0 + else: + data_values= numpy.concatenate((data_values, values0)) + times = numpy.array(times) + return Dataset(lats, lons, times, data_values, variable_name, name=name) + +def load_NLDAS_forcingA_files(file_path=None, + filename_pattern=None, + filelist=None, + variable_name='APCPsfc_110_SFC_acc1h', + name=''): + ''' Load multiple NLDAS2 forcingAWRF files containing 2D fields such as precipitation and surface variables into a Dataset. + The dataset can be spatially subset. + :param file_path: Directory to the NetCDF file to load. + :type file_path: :mod:`string` + :param filename_pattern: Path to the NetCDF file to load. + :type filename_pattern: :list:`string` + :param filelist: A list of filenames + :type filelist: :list:`string` + :param variable_name: The variable name to load from the NetCDF file. ++======= + def load_GPM_IMERG_files(file_path=None, + filename_pattern=None, + filelist=None, + variable_name='precipitationCal', + name='GPM_IMERG'): + ''' Load multiple GPM Level 3 IMEGE files containing calibrated precipitation and generate an OCW Dataset obejct. + :param file_path: Directory to the HDF files to load. + :type file_path: :mod:`string` + :param filename_pattern: Path to the HDF files to load. + :type filename_pattern: :list:`string` + :param filelist: A list of filenames + :type filelist: :list:`string` + :param variable_name: The variable name to load from the HDF file. ++>>>>>>> CLIMATE-687 :type variable_name: :mod:`string` :param name: (Optional) A name for the loaded dataset. :type name: :mod:`string` :returns: An OCW Dataset object with the requested variable's data from ++<<<<<<< HEAD + the NetCDF file. + :rtype: :class:`dataset.Dataset` + :raises ValueError: + ''' + + if not filelist: + NLDAS_files = [] + for pattern in filename_pattern: + NLDAS_files.extend(glob(file_path + pattern)) + else: + NLDAS_files = [line.rstrip('\n') for line in open(filelist)] + + NLDAS_files.sort() + + file_object_first = netCDF4.Dataset(NLDAS_files[0]) + lats = file_object_first.variables['lat_110'][:] + lons = file_object_first.variables['lon_110'][:] + lons, lats = numpy.meshgrid(lons, lats) + + times = [] + nfile = len(NLDAS_files) + for ifile, file in enumerate(NLDAS_files): + print 'Reading file '+str(ifile+1)+'/'+str(nfile), file + file_object = netCDF4.Dataset(file) + time_struct_parsed = strptime(file[-20:-7],"%Y%m%d.%H%M") + times.append(datetime(*time_struct_parsed[:6])) + + values0 = file_object.variables[variable_name][:] + values0 = numpy.expand_dims(values0, axis=0) + if ifile == 0: + values = values0 + variable_unit = file_object.variables[variable_name].units ++======= + the HDF file. + :rtype: :class:`dataset.Dataset` + :raises ValueError: + ''' + + if not filelist: + GPM_files = [] + for pattern in filename_pattern: + GPM_files.extend(glob(file_path + pattern)) + else: + GPM_files = [line.rstrip('\n') for line in open(filelist)] + + GPM_files.sort() + + file_object_first = h5py.File(GPM_files[0]) + lats = file_object_first['Grid']['lat'][:] + lons = file_object_first['Grid']['lon'][:] + + lons, lats = numpy.meshgrid(lons, lats) + + variable_unit = "mm/hr" + + times = [] + nfile = len(GPM_files) + for ifile, file in enumerate(GPM_files): + print 'Reading file '+str(ifile+1)+'/'+str(nfile), file + file_object = h5py.File(file) + time_struct_parsed = strptime(file[-39:-23],"%Y%m%d-S%H%M%S") + times.append(datetime(*time_struct_parsed[:6])) + values0= numpy.transpose(ma.masked_less(file_object['Grid'][variable_name][:], 0.)) + values0= numpy.expand_dims(values0, axis=0) + if ifile == 0: + values = values0 ++>>>>>>> CLIMATE-687 else: values = numpy.concatenate((values, values0)) file_object.close()