climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From good...@apache.org
Subject svn commit: r1508219 - in /incubator/climate/branches/RefactorInput: ocw/dataset_processor.py rcmet/src/main/python/rcmes/toolkit/process.py
Date Mon, 29 Jul 2013 22:27:29 GMT
Author: goodale
Date: Mon Jul 29 22:27:29 2013
New Revision: 1508219

URL: http://svn.apache.org/r1508219
Log:
CLIMATE-213 - Initial port of process.calc_average_on_new_time_unit_K() to ocw.dataset_processor.py.
* The code will be used to support temporal_rebin() within ocw.dataset_processor.py
* Unit Testing and better integration into OCW will be coming next.

Modified:
    incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
    incubator/climate/branches/RefactorInput/rcmet/src/main/python/rcmes/toolkit/process.py

Modified: incubator/climate/branches/RefactorInput/ocw/dataset_processor.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/ocw/dataset_processor.py?rev=1508219&r1=1508218&r2=1508219&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/ocw/dataset_processor.py (original)
+++ incubator/climate/branches/RefactorInput/ocw/dataset_processor.py Mon Jul 29 22:27:29
2013
@@ -17,6 +17,7 @@
 
 from ocw import dataset as ds
 
+import datetime
 import numpy as np
 import numpy.ma as ma
 import scipy.interpolate
@@ -195,6 +196,168 @@ def _rcmes_spatial_regrid(spatial_values
 
     return regridded_values
 
+
+def _rcmes_calc_average_on_new_time_unit_K(data, dateList, unit):
+    '''
+    Routine to calculate averages on longer time units than the data exists on.
+    e.g. if the data is 6-hourly, calculate daily, or monthly means.
+     
+    Input:
+        data     - data values
+        dateList - list of python datetime structures corresponding to data values
+        unit     - string describing time unit to average onto 
+                      e.g. 'monthly', 'daily', 'pentad','annual','decadal'
+    Output:
+        meanstorem - numpy masked array of data values meaned over required time period
+        newTimesList - a list of python datetime objects representing the data in the new
averagin period
+                           NB. currently set to beginning of averaging period, 
+                           i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
+    '''
+
+    # Check if the user-selected temporal grid is valid. If not, EXIT
+    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+    if not acceptable:
+        print 'Error: unknown unit type selected for time averaging: EXIT'
+        return -1,-1,-1,-1
+
+    # Calculate arrays of: annual timeseries: year (2007,2007),
+    #                      monthly time series: year-month (200701,200702),
+    #                      daily timeseries:  year-month-day (20070101,20070102) 
+    #  depending on user-selected averaging period.
+
+    # Year list
+    if unit=='annual':
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year))
+        timeunits = np.array(timeunits, dtype=int)
+         
+    # YearMonth format list
+    if unit=='monthly':
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+        timeunits = np.array(timeunits,dtype=int)
+
+    # YearMonthDay format list
+    if unit=='daily':
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d"
% dateList[i].day))
+        timeunits = np.array(timeunits,dtype=int)
+
+    # TODO: add pentad setting using Julian days?
+
+    # Full list: a special case
+    if unit == 'full':
+        comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(999)  # i.e. we just want the same value for all times.
+        timeunits = np.array(timeunits, dtype=int)
+
+    # empty list to store new times
+    newTimesList = []
+
+    # Decide whether or not you need to do any time averaging.
+    #   i.e. if data are already on required time unit then just pass data through and 
+    #        calculate and return representative datetimes.
+    processing_required = True
+    if len(timeunits)==(len(np.unique(timeunits))):
+        processing_required = False
+
+    # 1D data arrays, i.e. time series
+    if data.ndim==1:
+        # Create array to store the resulting data
+        meanstore = np.zeros(len(np.unique(timeunits)))
+  
+        # Calculate the means across each unique time unit
+        i=0
+        for myunit in np.unique(timeunits):
+            if processing_required:
+                datam=ma.masked_array(data,timeunits!=myunit)
+                meanstore[i] = ma.average(datam)
+            
+            # construct new times list
+            smyunit = str(myunit)
+            if len(smyunit)==4:  # YYYY
+                yyyy = int(myunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit)==6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit)==8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit)==3:  # Full time range
+                # Need to set an appropriate time representing the mid-point of the entire
time span
+                dt = dateList[-1]-dateList[0]
+                halfway = dateList[0]+(dt/2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+
+            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+            i = i+1
+
+    # 3D data arrays
+    if data.ndim==3:
+        # datamask = create_mask_using_threshold(data,threshold=0.75)
+        # Create array to store the resulting data
+        meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
+  
+        # Calculate the means across each unique time unit
+        i=0
+        datamask_store = []
+        for myunit in np.unique(timeunits):
+            if processing_required:
+                mask = np.zeros_like(data)
+                mask[timeunits!=myunit,:,:] = 1.0
+                # Calculate missing data mask within each time unit...
+                datamask_at_this_timeunit = np.zeros_like(data)
+                datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
+                # Store results for masking later
+                datamask_store.append(datamask_at_this_timeunit[0])
+                # Calculate means for each pixel in this time unit, ignoring missing data
(using masked array).
+                datam = ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit))
+                meanstore[i,:,:] = ma.average(datam,axis=0)
+            # construct new times list
+            smyunit = str(myunit)
+            if len(smyunit)==4:  # YYYY
+                yyyy = int(smyunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit)==6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit)==8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit)==3:  # Full time range
+                # Need to set an appropriate time representing the mid-point of the entire
time span
+                dt = dateList[-1]-dateList[0]
+                halfway = dateList[0]+(dt/2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+            i += 1
+
+        if not processing_required:
+            meanstorem = data
+
+        if processing_required:
+            # Create masked array (using missing data mask defined above)
+            datamask_store = np.array(datamask_store)
+            meanstorem = ma.masked_array(meanstore, datamask_store)
+
+    return meanstorem, newTimesList
+
 def _congrid(a, newdims, method='linear', centre=False, minusone=False):
     '''
     This function is from http://wiki.scipy.org/Cookbook/Rebinning - Example 3

Modified: incubator/climate/branches/RefactorInput/rcmet/src/main/python/rcmes/toolkit/process.py
URL: http://svn.apache.org/viewvc/incubator/climate/branches/RefactorInput/rcmet/src/main/python/rcmes/toolkit/process.py?rev=1508219&r1=1508218&r2=1508219&view=diff
==============================================================================
--- incubator/climate/branches/RefactorInput/rcmet/src/main/python/rcmes/toolkit/process.py
(original)
+++ incubator/climate/branches/RefactorInput/rcmet/src/main/python/rcmes/toolkit/process.py
Mon Jul 29 22:27:29 2013
@@ -162,7 +162,7 @@ def do_regrid(q, lat, lon, lat2, lon2, o
     This function has been moved to the ocw/dataset_processor module
     """
     from ocw import dataset_processor
-    q2 = dataset_processor._rcmes_spatial_regrid(q, lat, lon, lat2, lon2, order=1, mdi=-999999999)
+    q2 = dataset_processor._rcmes_spatial_regrid(q, lat, lon, lat2, lon2, order=1)
 
     return q2
 
@@ -822,165 +822,13 @@ def extract_sub_time_selection(allTimes,
     return subdata
 
 def calc_average_on_new_time_unit_K(data, dateList, unit):
-    '''
-    Routine to calculate averages on longer time units than the data exists on.
-    e.g. if the data is 6-hourly, calculate daily, or monthly means.
-     
-    Input:
-        data     - data values
-        dateList - list of python datetime structures corresponding to data values
-        unit     - string describing time unit to average onto 
-                      e.g. 'monthly', 'daily', 'pentad','annual','decadal'
-    Output:
-        meanstorem - numpy masked array of data values meaned over required time period
-        newTimesList - a list of python datetime objects representing the data in the new
averagin period
-                           NB. currently set to beginning of averaging period, 
-                           i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
-    '''
-
-    # Check if the user-selected temporal grid is valid. If not, EXIT
-    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
-    if not acceptable:
-        print 'Error: unknown unit type selected for time averaging: EXIT'
-        return -1,-1,-1,-1
-
-    # Calculate arrays of: annual timeseries: year (2007,2007),
-    #                      monthly time series: year-month (200701,200702),
-    #                      daily timeseries:  year-month-day (20070101,20070102) 
-    #  depending on user-selected averaging period.
-
-    # Year list
-    if unit=='annual':
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year))
-        timeunits = np.array(timeunits, dtype=int)
-         
-    # YearMonth format list
-    if unit=='monthly':
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
-        timeunits = np.array(timeunits,dtype=int)
-
-    # YearMonthDay format list
-    if unit=='daily':
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d"
% dateList[i].day))
-        timeunits = np.array(timeunits,dtype=int)
-
-    # TODO: add pentad setting using Julian days?
-
-    # Full list: a special case
-    if unit == 'full':
-        comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
-        timeunits = []
-        for i in np.arange(len(dateList)):
-            timeunits.append(999)  # i.e. we just want the same value for all times.
-        timeunits = np.array(timeunits, dtype=int)
-
-    # empty list to store new times
-    newTimesList = []
-
-    # Decide whether or not you need to do any time averaging.
-    #   i.e. if data are already on required time unit then just pass data through and 
-    #        calculate and return representative datetimes.
-    processing_required = True
-    if len(timeunits)==(len(np.unique(timeunits))):
-        processing_required = False
-
-    # 1D data arrays, i.e. time series
-    if data.ndim==1:
-        # Create array to store the resulting data
-        meanstore = np.zeros(len(np.unique(timeunits)))
-  
-        # Calculate the means across each unique time unit
-        i=0
-        for myunit in np.unique(timeunits):
-            if processing_required:
-                datam=ma.masked_array(data,timeunits!=myunit)
-                meanstore[i] = ma.average(datam)
-            
-            # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit)==4:  # YYYY
-                yyyy = int(myunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit)==6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit)==8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit)==3:  # Full time range
-                # Need to set an appropriate time representing the mid-point of the entire
time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-            i = i+1
-
-    # 3D data arrays
-    if data.ndim==3:
-        # datamask = create_mask_using_threshold(data,threshold=0.75)
-        # Create array to store the resulting data
-        meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
-  
-        # Calculate the means across each unique time unit
-        i=0
-        datamask_store = []
-        for myunit in np.unique(timeunits):
-            if processing_required:
-                mask = np.zeros_like(data)
-                mask[timeunits!=myunit,:,:] = 1.0
-                # Calculate missing data mask within each time unit...
-                datamask_at_this_timeunit = np.zeros_like(data)
-                datamask_at_this_timeunit[:]= create_mask_using_threshold(data[timeunits==myunit,:,:],threshold=0.75)
-                # Store results for masking later
-                datamask_store.append(datamask_at_this_timeunit[0])
-                # Calculate means for each pixel in this time unit, ignoring missing data
(using masked array).
-                datam = ma.masked_array(data,np.logical_or(mask,datamask_at_this_timeunit))
-                meanstore[i,:,:] = ma.average(datam,axis=0)
-            # construct new times list
-            smyunit = str(myunit)
-            if len(smyunit)==4:  # YYYY
-                yyyy = int(smyunit[0:4])
-                mm = 1
-                dd = 1
-            if len(smyunit)==6:  # YYYYMM
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = 1
-            if len(smyunit)==8:  # YYYYMMDD
-                yyyy = int(smyunit[0:4])
-                mm = int(smyunit[4:6])
-                dd = int(smyunit[6:8])
-            if len(smyunit)==3:  # Full time range
-                # Need to set an appropriate time representing the mid-point of the entire
time span
-                dt = dateList[-1]-dateList[0]
-                halfway = dateList[0]+(dt/2)
-                yyyy = int(halfway.year)
-                mm = int(halfway.month)
-                dd = int(halfway.day)
-            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-            i += 1
-
-        if not processing_required:
-            meanstorem = data
-
-        if processing_required:
-            # Create masked array (using missing data mask defined above)
-            datamask_store = np.array(datamask_store)
-            meanstorem = ma.masked_array(meanstore, datamask_store)
+    """ 
+    This function has been moved to the ocw/dataset_processor module
+    """
+    from ocw import dataset_processor
+    temporally_regridded_data = dataset_processor._rcmes_calc_average_on_new_time_unit_K(data,
dateList, unit)
+    return temporally_regridded_data
 
-    return meanstorem,newTimesList
 
 def decode_model_timesK(ifile,timeVarName,file_type):
     #################################################################################################



Mime
View raw message