climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From huiky...@apache.org
Subject [6/8] climate git commit: CLIMATE-841 - ocw.dataset_processor.subset
Date Wed, 10 Aug 2016 14:28:23 GMT
CLIMATE-841 - ocw.dataset_processor.subset

- dataset_processor.subset supports two boundary types='us_states' and 'countries'
- examples/subset_TRMM_data_for_NCA_regions.py has been added to test the added spatial subsetting
functionality.


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/243a8953
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/243a8953
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/243a8953

Branch: refs/heads/master
Commit: 243a8953fc265229a0cf47b581e7a8f0aa78a9cd
Parents: 036e5fd
Author: huikyole <huikyole@argo.jpl.nasa.gov>
Authored: Mon Aug 8 21:41:08 2016 -0700
Committer: huikyole <huikyole@argo.jpl.nasa.gov>
Committed: Mon Aug 8 21:41:08 2016 -0700

----------------------------------------------------------------------
 examples/subregions.py                        | 53 -------------
 examples/subregions_rectangular_boundaries.py | 53 +++++++++++++
 examples/subset_TRMM_data_for_NCA_regions.py  | 56 +++++++++++++
 ocw/dataset.py                                | 65 +--------------
 ocw/dataset_processor.py                      | 51 +++++++-----
 ocw/utils.py                                  | 92 +++++++++++++++++++++-
 6 files changed, 235 insertions(+), 135 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/examples/subregions.py
----------------------------------------------------------------------
diff --git a/examples/subregions.py b/examples/subregions.py
deleted file mode 100644
index 20aaee9..0000000
--- a/examples/subregions.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#Apache OCW lib immports
-from ocw.dataset import Dataset, Bounds
-import ocw.data_source.local as local
-import ocw.data_source.rcmed as rcmed
-import ocw.dataset_processor as dsp
-import ocw.evaluation as evaluation
-import ocw.metrics as metrics
-import ocw.plotter as plotter
-import ocw.utils as utils
-
-import datetime
-import numpy as np
-import numpy.ma as ma
-
-OUTPUT_PLOT = "subregions"
-
-# Spatial and temporal configurations
-LAT_MIN = -45.0 
-LAT_MAX = 42.24
-LON_MIN = -24.0
-LON_MAX = 60.0 
-START_SUB = datetime.datetime(2000, 01, 1)
-END_SUB = datetime.datetime(2007, 12, 31)
-
-#regridding parameters
-gridLonStep=0.5
-gridLatStep=0.5
-
-#Regrid
-print("... regrid")
-new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep)
-new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep)
-
-list_of_regions = [
- Bounds(-10.0, 0.0, 29.0, 36.5, START_SUB, END_SUB), 
- Bounds(0.0, 10.0,  29.0, 37.5, START_SUB, END_SUB),
- Bounds(10.0, 20.0, 25.0, 32.5, START_SUB, END_SUB),
- Bounds(20.0, 33.0, 25.0, 32.5, START_SUB, END_SUB),
- Bounds(-19.3,-10.2,12.0, 20.0, START_SUB, END_SUB),
- Bounds( 15.0, 30.0, 15.0, 25.0,START_SUB, END_SUB),
- Bounds(-10.0, 10.0, 7.3, 15.0, START_SUB, END_SUB),
- Bounds(-10.9, 10.0, 5.0, 7.3,  START_SUB, END_SUB),
- Bounds(33.9, 40.0,  6.9, 15.0, START_SUB, END_SUB),
- Bounds(10.0, 25.0,  0.0, 10.0, START_SUB, END_SUB),
- Bounds(10.0, 25.0,-10.0,  0.0, START_SUB, END_SUB),
- Bounds(30.0, 40.0,-15.0,  0.0, START_SUB, END_SUB),
- Bounds(33.0, 40.0, 25.0, 35.0, START_SUB, END_SUB)]
-
-#for plotting the subregions
-plotter.draw_subregions(list_of_regions, new_lats, new_lons, OUTPUT_PLOT, fmt='png')
-
-                               
-

http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/examples/subregions_rectangular_boundaries.py
----------------------------------------------------------------------
diff --git a/examples/subregions_rectangular_boundaries.py b/examples/subregions_rectangular_boundaries.py
new file mode 100644
index 0000000..20aaee9
--- /dev/null
+++ b/examples/subregions_rectangular_boundaries.py
@@ -0,0 +1,53 @@
+#Apache OCW lib immports
+from ocw.dataset import Dataset, Bounds
+import ocw.data_source.local as local
+import ocw.data_source.rcmed as rcmed
+import ocw.dataset_processor as dsp
+import ocw.evaluation as evaluation
+import ocw.metrics as metrics
+import ocw.plotter as plotter
+import ocw.utils as utils
+
+import datetime
+import numpy as np
+import numpy.ma as ma
+
+OUTPUT_PLOT = "subregions"
+
+# Spatial and temporal configurations
+LAT_MIN = -45.0 
+LAT_MAX = 42.24
+LON_MIN = -24.0
+LON_MAX = 60.0 
+START_SUB = datetime.datetime(2000, 01, 1)
+END_SUB = datetime.datetime(2007, 12, 31)
+
+#regridding parameters
+gridLonStep=0.5
+gridLatStep=0.5
+
+#Regrid
+print("... regrid")
+new_lats = np.arange(LAT_MIN, LAT_MAX, gridLatStep)
+new_lons = np.arange(LON_MIN, LON_MAX, gridLonStep)
+
+list_of_regions = [
+ Bounds(-10.0, 0.0, 29.0, 36.5, START_SUB, END_SUB), 
+ Bounds(0.0, 10.0,  29.0, 37.5, START_SUB, END_SUB),
+ Bounds(10.0, 20.0, 25.0, 32.5, START_SUB, END_SUB),
+ Bounds(20.0, 33.0, 25.0, 32.5, START_SUB, END_SUB),
+ Bounds(-19.3,-10.2,12.0, 20.0, START_SUB, END_SUB),
+ Bounds( 15.0, 30.0, 15.0, 25.0,START_SUB, END_SUB),
+ Bounds(-10.0, 10.0, 7.3, 15.0, START_SUB, END_SUB),
+ Bounds(-10.9, 10.0, 5.0, 7.3,  START_SUB, END_SUB),
+ Bounds(33.9, 40.0,  6.9, 15.0, START_SUB, END_SUB),
+ Bounds(10.0, 25.0,  0.0, 10.0, START_SUB, END_SUB),
+ Bounds(10.0, 25.0,-10.0,  0.0, START_SUB, END_SUB),
+ Bounds(30.0, 40.0,-15.0,  0.0, START_SUB, END_SUB),
+ Bounds(33.0, 40.0, 25.0, 35.0, START_SUB, END_SUB)]
+
+#for plotting the subregions
+plotter.draw_subregions(list_of_regions, new_lats, new_lons, OUTPUT_PLOT, fmt='png')
+
+                               
+

http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/examples/subset_TRMM_data_for_NCA_regions.py
----------------------------------------------------------------------
diff --git a/examples/subset_TRMM_data_for_NCA_regions.py b/examples/subset_TRMM_data_for_NCA_regions.py
new file mode 100644
index 0000000..bc946a2
--- /dev/null
+++ b/examples/subset_TRMM_data_for_NCA_regions.py
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#Apache OCW lib immports
+import ocw.dataset_processor as dsp
+import ocw.utils as utils
+from ocw.dataset import Bounds
+import ocw.data_source.rcmed as rcmed
+import ocw.plotter as plotter
+
+from datetime import datetime
+import numpy.ma as ma
+
+import ssl
+
+if hasattr(ssl, '_create_unverified_context'):
+  ssl._create_default_https_context = ssl._create_unverified_context
+
+# rectangular boundary
+min_lat = 15.75
+max_lat = 55.75
+min_lon = -125.75
+max_lon = -66.75
+
+start_time = datetime(1998,1,1)
+end_time = datetime(1998,12,31)
+
+TRMM_dataset = rcmed.parameter_dataset(3, 36, min_lat, max_lat, min_lon, max_lon,
+                                            start_time, end_time)
+
+Cuba_and_Bahamas_bounds = Bounds(boundary_type='countries', countries=['Cuba','Bahamas'])
+TRMM_dataset2 = dsp.subset(TRMM_dataset, Cuba_and_Bahamas_bounds, extract=False) # to mask
out the data over Mexico and Canada
+
+plotter.draw_contour_map(ma.mean(TRMM_dataset2.values, axis=0), TRMM_dataset2.lats, TRMM_dataset2.lons,
fname='TRMM_without_Cuba_and_Bahamas')
+
+NCA_SW_bounds = Bounds(boundary_type='us_states', us_states=['CA','NV','UT','AZ','NM','CO'])
+TRMM_dataset3 = dsp.subset(TRMM_dataset2, NCA_SW_bounds, extract=True) # to mask out the
data over Mexico and Canada
+
+plotter.draw_contour_map(ma.mean(TRMM_dataset3.values, axis=0), TRMM_dataset3.lats, TRMM_dataset3.lons,
fname='TRMM_NCA_SW')
+
+
+

http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/ocw/dataset.py
----------------------------------------------------------------------
diff --git a/ocw/dataset.py b/ocw/dataset.py
index 562c014..3dfc835 100644
--- a/ocw/dataset.py
+++ b/ocw/dataset.py
@@ -298,9 +298,9 @@ class Bounds(object):
             self._end = None
 
         if boundary_type == 'us_states':
-            self.masked_regions = shapefile_boundary(boundary_type, us_states)
+            self.masked_regions = utils.shapefile_boundary(boundary_type, us_states)
         if boundary_type == 'countries':
-            self.masked_regions = shapefile_boundary(boundary_type, countries)
+            self.masked_regions = utils.shapefile_boundary(boundary_type, countries)
         if boundary_type == 'user':
             file_object = netCDF4.Dataset(user_mask_file)
             self.mask_variable = file_object.variables[mask_variable_name][:]
@@ -334,7 +334,7 @@ class Bounds(object):
             self.lon_min = float(lon_min)
             self.lon_max = float(lon_max)
         if boundary_type[:6].upper() == 'CORDEX':
-            self.lat_min, self.lat_max, self.lon_min, self.lon_max = CORDEX_boundary(boundary_type[6:].replace("
","").lower())
+            self.lat_min, self.lat_max, self.lon_min, self.lon_max = utils.CORDEX_boundary(boundary_type[6:].replace("
","").lower())
 
     @property
     def start(self):
@@ -364,62 +364,3 @@ class Bounds(object):
 
         self._end = value
 
-def shapefile_boundary(boundary_type, region_names):
-    '''
-    :param boundary_type: The type of spatial subset boundary
-    :type boundary_type: :mod:'string'
-
-    :param region_names: An array of regions for spatial subset
-    :type region_names: :mod:'list'
-    '''
-    # Read the shapefile
-    map_read = Basemap()
-    regions = []
-    shapefile_dir = os.sep.join([os.path.dirname(__file__), 'shape'])
-    map_read.readshapefile(os.path.join(shapefile_dir, boundary_type),
-                           boundary_type)
-    if boundary_type == 'us_states':
-        for region_name in region_names:
-            for iregion, region_info in enumerate(map_read.us_states_info):
-                if region_info['st'] == region_name:
-                    regions.append(numpy.array(map_read.us_states[iregion]))
-    elif boundary_type == 'countries':
-        for region_name in region_names:
-            for iregion, region_info in enumerate(map_read.countries_info):
-                if region_info['COUNTRY'] == region_name:
-                    regions.append(numpy.array(map_read.countries[iregion]))
-    return regions
-
-def CORDEX_boundary(domain_name):
-    '''
-    :param domain_name: CORDEX domain name (http://www.cordex.org/)
-    :type domain_name: :mod:'string'
-    '''
-    if domain_name =='southamerica':
-        return -57.61, 18.50, 254.28-360., 343.02-360.
-    if domain_name =='centralamerica':
-        return -19.46, 34.83, 235.74-360., 337.78-360.
-    if domain_name =='northamerica':
-        return  12.55, 75.88, 189.26-360., 336.74-360.
-    if domain_name =='europe':
-        return  22.20, 71.84, 338.23-360., 64.4
-    if domain_name =='africa':
-        return -45.76, 42.24, 335.36-360., 60.28
-    if domain_name =='southasia':
-        return -15.23, 45.07, 19.88, 115.55
-    if domain_name =='eastasia':
-        return  -0.10, 61.90, 51.59, 179.99
-    if domain_name =='centralasia':
-        return  18.34, 69.37, 11.05, 139.13
-    if domain_name =='australasia':
-        return -52.36, 12.21, 89.25, 179.99
-    if domain_name =='antartica':
-        return -89.48,-56.00, -179.00, 179.00
-    if domain_name =='artic':
-        return 46.06, 89.50, -179.00, 179.00
-    if domain_name =='mediterranean':
-        return  25.63, 56.66, 339.79-360.00, 50.85
-    if domain_name =='middleeastnorthafrica':
-        return  -7.00, 45.00, 333.00-360.00, 76.00
-    if domain_name =='southeastasia':
-        return  -15.14, 27.26, 89.26, 146.96

http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/ocw/dataset_processor.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 8b817a2..56980c3 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -16,6 +16,7 @@
 #
 
 from ocw import dataset as ds
+import ocw.utils as utils
 
 import datetime
 import numpy as np
@@ -362,7 +363,7 @@ def ensemble(datasets):
     return ensemble_dataset
 
 
-def subset(target_dataset, subregion, subregion_name=None):
+def subset(target_dataset, subregion, subregion_name=None, extract=True):
     '''Subset given dataset(s) with subregion information
 
     :param subregion: The Bounds with which to subset the target Dataset.
@@ -374,6 +375,9 @@ def subset(target_dataset, subregion, subregion_name=None):
     :param subregion_name: The subset-ed Dataset name
     :type subregion_name: :mod:`string`
 
+    :param extract: If False, the dataset inside regions will be masked.
+    :type extract: :mod:`boolean`
+
     :returns: The subset-ed Dataset object
     :rtype: :class:`dataset.Dataset`
 
@@ -387,7 +391,7 @@ def subset(target_dataset, subregion, subregion_name=None):
     if not subregion_name:
         subregion_name = target_dataset.name
 
-    if subregion.lat_min:
+    if hasattr(subregion, 'lat_min'):
         # If boundary_type is 'rectangular' or 'CORDEX ***', ensure that the subregion information
is well formed
         _are_bounds_contained_by_dataset(target_dataset, subregion)
 
@@ -435,27 +439,36 @@ def subset(target_dataset, subregion, subregion_name=None):
                     dataset_slices["time_start"]:dataset_slices["time_end"] + 1,
                     dataset_slices["lat_start"]:dataset_slices["lat_end"] + 1,
                     dataset_slices["lon_start"]:dataset_slices["lon_end"] + 1]
-
-        # Build new dataset with subset information
-        return ds.Dataset(
+            # Build new dataset with subset information
+            return ds.Dataset(
             # Slice the lats array with our calculated slice indices
-            target_dataset.lats[dataset_slices["lat_start"]:
-                                dataset_slices["lat_end"] + 1],
+                target_dataset.lats[dataset_slices["lat_start"]:
+                                    dataset_slices["lat_end"] + 1],
             # Slice the lons array with our calculated slice indices
-            target_dataset.lons[dataset_slices["lon_start"]:
-                                dataset_slices["lon_end"] + 1],
+                target_dataset.lons[dataset_slices["lon_start"]:
+                                    dataset_slices["lon_end"] + 1],
             # Slice the times array with our calculated slice indices
-            target_dataset.times[dataset_slices["time_start"]:
-                                 dataset_slices["time_end"] + 1],
+                target_dataset.times[dataset_slices["time_start"]:
+                                     dataset_slices["time_end"] + 1],
             # Slice the values array with our calculated slice indices
-            subset_values,
-            variable=target_dataset.variable,
-            units=target_dataset.units,
-            name=subregion_name,
-            origin=target_dataset.origin
-        )
-
-
+                subset_values,
+                variable=target_dataset.variable,
+                units=target_dataset.units,
+                name=subregion_name,
+                origin=target_dataset.origin
+                )
+
+    if subregion.boundary_type == 'us_states' or subregion.boundary_type == 'countries':
+        start_time_index = np.where(
+            target_dataset.times == subregion.start)[0][0]
+        end_time_index = np.where(target_dataset.times == subregion.end)[0][0]
+        target_dataset = temporal_slice(
+            target_dataset, start_time_index, end_time_index)
+        nt, ny, nx = target_dataset.values.shape
+        spatial_mask = utils.mask_using_shapefile_info(target_dataset.lons, target_dataset.lats,
subregion.masked_regions, extract = extract)
+        target_dataset.values = utils.propagate_spatial_mask_over_time(target_dataset.values,
mask=spatial_mask)
+        return target_dataset
+            
 def temporal_slice(target_dataset, start_time_index, end_time_index):
     '''Temporally slice given dataset(s) with subregion information. This does not
     spatially subset the target_Dataset

http://git-wip-us.apache.org/repos/asf/climate/blob/243a8953/ocw/utils.py
----------------------------------------------------------------------
diff --git a/ocw/utils.py b/ocw/utils.py
index 2fab66f..0a9ba22 100755
--- a/ocw/utils.py
+++ b/ocw/utils.py
@@ -18,11 +18,13 @@
 ''''''
 
 import sys
+import os
 import datetime as dt
 import numpy as np
 import numpy.ma as ma
 
-from mpl_toolkits.basemap import shiftgrid
+from mpl_toolkits.basemap import shiftgrid, Basemap, maskoceans
+from matplotlib.path import Path
 from dateutil.relativedelta import relativedelta
 from netCDF4 import num2date
 
@@ -445,3 +447,91 @@ def calc_area_weighted_spatial_average(dataset, area_weight=False):
             spatial_average[it] = ma.average(dataset.values[it, :])
 
     return spatial_average
+
+def shapefile_boundary(boundary_type, region_names):
+    '''
+    :param boundary_type: The type of spatial subset boundary
+    :type boundary_type: :mod:'string'
+
+    :param region_names: An array of regions for spatial subset
+    :type region_names: :mod:'list'
+    '''
+    # Read the shapefile
+    map_read = Basemap()
+    regions = []
+    shapefile_dir = os.sep.join([os.path.dirname(__file__), 'shape'])
+    map_read.readshapefile(os.path.join(shapefile_dir, boundary_type),
+                           boundary_type)
+    if boundary_type == 'us_states':
+        for region_name in region_names:
+            for iregion, region_info in enumerate(map_read.us_states_info):
+                if region_info['st'] == region_name:
+                    regions.append(np.array(map_read.us_states[iregion]))
+    elif boundary_type == 'countries':
+        for region_name in region_names:
+            for iregion, region_info in enumerate(map_read.countries_info):
+                if region_info['COUNTRY'].replace(" ","").lower() == region_name.replace("
","").lower():
+                    regions.append(np.array(map_read.countries[iregion]))
+    return regions
+
+def CORDEX_boundary(domain_name):
+    '''
+    :param domain_name: CORDEX domain name (http://www.cordex.org/)
+    :type domain_name: :mod:'string'
+    '''
+    if domain_name =='southamerica':
+        return -57.61, 18.50, 254.28-360., 343.02-360.
+    if domain_name =='centralamerica':
+        return -19.46, 34.83, 235.74-360., 337.78-360.
+    if domain_name =='northamerica':
+        return  12.55, 75.88, 189.26-360., 336.74-360.
+    if domain_name =='europe':
+        return  22.20, 71.84, 338.23-360., 64.4
+    if domain_name =='africa':
+        return -45.76, 42.24, 335.36-360., 60.28
+    if domain_name =='southasia':
+        return -15.23, 45.07, 19.88, 115.55
+    if domain_name =='eastasia':
+        return  -0.10, 61.90, 51.59, 179.99
+    if domain_name =='centralasia':
+        return  18.34, 69.37, 11.05, 139.13
+    if domain_name =='australasia':
+        return -52.36, 12.21, 89.25, 179.99
+    if domain_name =='antartica':
+        return -89.48,-56.00, -179.00, 179.00
+    if domain_name =='artic':
+        return 46.06, 89.50, -179.00, 179.00
+    if domain_name =='mediterranean':
+        return  25.63, 56.66, 339.79-360.00, 50.85
+    if domain_name =='middleeastnorthafrica':
+        return  -7.00, 45.00, 333.00-360.00, 76.00
+    if domain_name =='southeastasia':
+        return  -15.14, 27.26, 89.26, 146.96
+
+def mask_using_shapefile_info(lons, lats, masked_regions, extract = True):
+    if lons.ndim == 2 and lats.ndim == 2:
+        lons_2d = lons
+        lats_2d = lats
+    else:
+        lons_2d, lats_2d = np.meshgrid(lons, lats)
+
+    points = np.array((lons_2d.flatten(), lats_2d.flatten())).T
+    for iregion, region in enumerate(masked_regions):
+        mpath = Path(region)
+        mask0 = mpath.contains_points(points).reshape(lons_2d.shape)
+        if iregion == 0:
+            mask = mask0
+        else:
+            mask = mask|mask0 
+    if extract:
+        mask = np.invert(mask)
+    return mask
+
+def propagate_spatial_mask_over_time(data_array, mask):
+    if data_array.ndim == 3 and mask.ndim == 2:
+        nt = data_array.shape[0]
+        for it in np.arange(nt):
+            mask_original = data_array[it,:].mask
+            data_array[it,:] = ma.array(data_array[it,:], mask=mask|mask_original)
+        return data_array 
+


Mime
View raw message