climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From whiteh...@apache.org
Subject svn commit: r1506283 - /incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py
Date Tue, 23 Jul 2013 21:07:47 GMT
Author: whitehall
Date: Tue Jul 23 21:07:47 2013
New Revision: 1506283

URL: http://svn.apache.org/r1506283
Log:
CLIMATE-67
Adding functionality to open hourly data in RCMES

Modified:
    incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py

Modified: incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py
URL: http://svn.apache.org/viewvc/incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py?rev=1506283&r1=1506282&r2=1506283&view=diff
==============================================================================
--- incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py (original)
+++ incubator/climate/trunk/rcmet/src/main/python/rcmes/toolkit/process.py Tue Jul 23 21:07:47 2013
@@ -14,7 +14,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
-"""Collection of functions that process numpy arrays of varying shapes.  
+"""Collection of functions that process numpy arrays of varying shapes.
 Functions can range from subsetting to regridding"""
 
 # Python Standard Libary Imports
@@ -35,29 +35,29 @@ from scipy.ndimage import map_coordinate
 def extract_subregion_from_data_array(data, lats, lons, latmin, latmax, lonmin, lonmax):
     '''
      Extract a sub-region from a data array.
-     
+
      Example::
        **e.g.** the user may load a global model file, but only want to examine data over North America
             This function extracts a sub-domain from the original data.
             The defined sub-region must be a regular lat/lon bounding box,
             but the model data may be on a non-regular grid (e.g. rotated, or Guassian grid layout).
-            Data are kept on the original model grid and a rectangular (in model-space) region 
+            Data are kept on the original model grid and a rectangular (in model-space) region
             is extracted which contains the rectangular (in lat/lon space) user supplied region.
-    
+
      INPUT::
        data - 3d masked data array
        lats - 2d array of latitudes corresponding to data array
        lons - 2d array of longitudes corresponding to data array
        latmin, latmax, lonmin, lonmax - bounding box of required region to extract
-       
+
      OUTPUT::
        data2 - subset of original data array containing only data from required subregion
        lats2 - subset of original latitude data array
        lons2 - subset of original longitude data array
 
     '''
-    
-    
+
+
 
     # Mask model data array to find grid points inside the supplied bounding box
     whlat = (lats > latmin) & (lats < latmax)
@@ -83,37 +83,37 @@ def extract_subregion_from_data_array(da
 def calc_area_mean(data, lats, lons, mymask='not set'):
     '''
      Calculate Area Average of data in a masked array
-     
+
      INPUT::
          data:  a masked array of data (NB. only data from one time expected to be passed at once)
          lats:  2d array of regularly gridded latitudes
          lons:  2d array of regularly gridded longitudes
          mymask:  (optional) defines spatial region to do averaging over
-     
+
      OUTPUT::
          area_mean: a value for the mean inside the area
 
     '''
-    
-    
+
+
 
     # If mask not passed in, then set maks to cover whole data domain
     if(mymask == 'not set'):
-       mymask = np.empty(data.shape)
-       mymask[:] = False	# NB. mask means (don't show), so False everywhere means use everything.
-   
+        mymask = np.empty(data.shape)
+        mymask[:] = False        # NB. mask means (don't show), so False everywhere means use everything.
+
     # Dimension check on lats, lons
     #  Sometimes these arrays are 3d, sometimes 2d, sometimes 1d
     #  This bit of code just converts to the required 2d array shape
     if(len(lats.shape) == 3):
-       lats = lats[0, :, :]
+        lats = lats[0, :, :]
 
     if(len(lons.shape) == 3):
-       lons = lons[0, :, :]
+        lons = lons[0, :, :]
 
     if(np.logical_and(len(lats.shape) == 1, len(lons.shape) == 1)):
-       lons, lats = np.meshgrid(lons, lats)
- 
+        lons, lats = np.meshgrid(lons, lats)
+
     # Calculate grid length (assuming regular lat/lon grid)
     dlat = lats[1, 0] - lats[0, 0]
     dlon = lons[0, 1] - lons[0, 0]
@@ -124,28 +124,28 @@ def calc_area_mean(data, lats, lons, mym
     # Create a new masked array covering just user selected area (defined in mymask)
     #   NB. this preserves missing data points in the observations data
     subdata = ma.masked_array(data, mask=mymask)
- 
+
     if(myweights.shape != subdata.shape):
-       myweights.resize(subdata.shape)
-       myweights[1:, :] = myweights[0, :]
+        myweights.resize(subdata.shape)
+        myweights[1:, :] = myweights[0, :]
 
     # Calculate weighted mean using ma.average (which takes weights)
     area_mean = ma.average(subdata, weights=myweights)
- 
+
     return area_mean
 
 def calc_area_in_grid_box(latitude, dlat, dlon):
     '''
      Calculate area of regular lat-lon grid box
-     
+
      INPUT::
         latitude: latitude of grid box (degrees)
         dlat:     grid length in latitude direction (degrees)
         dlon:     grid length in longitude direction (degrees)
-        
+
      OUTPUT::
         A:        area of the grid box
-      
+
       '''
 
     R = 6371000  # radius of Earth in metres
@@ -160,17 +160,17 @@ def calc_area_in_grid_box(latitude, dlat
 def do_regrid(q, lat, lon, lat2, lon2, order=1, mdi= -999999999):
     '''
      Perform regridding from one set of lat,lon values onto a new set (lat2,lon2)
-    
+
      Input::
          q          - the variable to be regridded
          lat,lon    - original co-ordinates corresponding to q values
-         lat2,lon2  - new set of latitudes and longitudes that you want to regrid q onto 
+         lat2,lon2  - new set of latitudes and longitudes that you want to regrid q onto
          order      - (optional) interpolation order 1=bi-linear, 3=cubic spline
-         mdi  	    - (optional) fill value for missing data (used in creation of masked array)
-      
+         mdi        - (optional) fill value for missing data (used in creation of masked array)
+
      Output::
-         q2  - q regridded onto the new set of lat2,lon2 
-    
+         q2  - q regridded onto the new set of lat2,lon2
+
     '''
 
     nlat = q.shape[0]
@@ -179,7 +179,7 @@ def do_regrid(q, lat, lon, lat2, lon2, o
     nlat2 = lat2.shape[0]
     nlon2 = lon2.shape[1]
 
-    # To make our lives easier down the road, let's 
+    # To make our lives easier down the road, let's
     # turn these into arrays of x & y coords
     loni = lon2.ravel()
     lati = lat2.ravel()
@@ -190,20 +190,20 @@ def do_regrid(q, lat, lon, lat2, lon2, o
     # Now, we'll set points outside the boundaries to lie along an edge
     loni[loni > lon.max()] = lon.max()
     loni[loni < lon.min()] = lon.min()
-    
+
     # To deal with the "hard" break, we'll have to treat y differently,
     # so we're just setting the min here...
     lati[lati > lat.max()] = lat.max()
     lati[lati < lat.min()] = lat.min()
-    
-    
+
+
     # We need to convert these to (float) indicies
     #   (xi should range from 0 to (nx - 1), etc)
     loni = (nlon - 1) * (loni - lon.min()) / (lon.max() - lon.min())
-    
+
     # Deal with the "hard" break in the y-direction
     lati = (nlat - 1) * (lati - lat.min()) / (lat.max() - lat.min())
-    
+
     # Notes on dealing with MDI when regridding data.
     #  Method adopted here:
     #    Use bilinear interpolation of data by default (but user can specify other order using order=... in call)
@@ -212,7 +212,7 @@ def do_regrid(q, lat, lon, lat2, lon2, o
     #            -this is achieved by looking for any non-zero interpolated mask values.
     #    To avoid issues with bilinear interpolation producing strong gradients leading into the MDI,
     #     set values at MDI points to mean data value so little gradient visible = not ideal, but acceptable for now.
-    
+
     # Set values in MDI so that similar to surroundings so don't produce large gradients when interpolating
     # Preserve MDI mask, by only changing data part of masked array object.
     for shift in (-1, 1):
@@ -222,17 +222,17 @@ def do_regrid(q, lat, lon, lat2, lon2, o
             q.data[idx] = q_shifted[idx]
 
     # Now we actually interpolate
-    # map_coordinates does cubic interpolation by default, 
+    # map_coordinates does cubic interpolation by default,
     # use "order=1" to preform bilinear interpolation instead...
     q2 = map_coordinates(q, [lati, loni], order=order)
     q2 = q2.reshape([nlat2, nlon2])
 
     # Set values to missing data outside of original domain
-    q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= lat.max(), 
-                                                              lat2 <= lat.min()), 
-                                                np.logical_or(lon2 <= lon.min(), 
+    q2 = ma.masked_array(q2, mask=np.logical_or(np.logical_or(lat2 >= lat.max(),
+                                                              lat2 <= lat.min()),
+                                                np.logical_or(lon2 <= lon.min(),
                                                               lon2 >= lon.max())))
-    
+
     # Make second map using nearest neighbour interpolation -use this to determine locations with MDI and mask these
     qmdi = np.zeros_like(q)
     qmdi[q.mask == True] = 1.
@@ -240,303 +240,303 @@ def do_regrid(q, lat, lon, lat2, lon2, o
     qmdi_r = map_coordinates(qmdi, [lati, loni], order=order)
     qmdi_r = qmdi_r.reshape([nlat2, nlon2])
     mdimask = (qmdi_r != 0.0)
-    
+
     # Combine missing data mask, with outside domain mask define above.
     q2.mask = np.logical_or(mdimask, q2.mask)
 
     return q2
 
 def create_mask_using_threshold(masked_array, threshold=0.5):
-   '''
-    Routine to create a mask, depending on the proportion of times with missing data.
-    For each pixel, calculate proportion of times that are missing data,
-    **if** the proportion is above a specified threshold value,
-    then mark the pixel as missing data.
-   
-    Input::
-        masked_array - a numpy masked array of data (assumes time on axis 0, and space on axes 1 and 2).
-        threshold    - (optional) threshold proportion above which a pixel is marked as 'missing data'. 
-        NB. default threshold = 50%
-   
-    Output::
-        mymask       - a numpy array describing the mask. NB. not a masked array, just the mask itself.
+    '''
+     Routine to create a mask, depending on the proportion of times with missing data.
+     For each pixel, calculate proportion of times that are missing data,
+     **if** the proportion is above a specified threshold value,
+     then mark the pixel as missing data.
 
-   '''
-   
+     Input::
+         masked_array - a numpy masked array of data (assumes time on axis 0, and space on axes 1 and 2).
+         threshold    - (optional) threshold proportion above which a pixel is marked as 'missing data'.
+         NB. default threshold = 50%
+
+     Output::
+         mymask       - a numpy array describing the mask. NB. not a masked array, just the mask itself.
+
+    '''
 
-   # try, except used as some model files don't have a full mask, but a single bool
-   #  the except catches this situation and deals with it appropriately.
-   try:
-       nT = masked_array.mask.shape[0]
-
-       # For each pixel, count how many times are masked.
-       nMasked = masked_array.mask[:, :, :].sum(axis=0)
-
-       # Define new mask as when a pixel has over a defined threshold ratio of masked data
-       #   e.g. if the threshold is 75%, and there are 10 times,
-       #        then a pixel will be masked if more than 5 times are masked.
-       mymask = nMasked > (nT * threshold)
 
-   except:
-       mymask = np.zeros_like(masked_array.data[0, :, :])
+    # try, except used as some model files don't have a full mask, but a single bool
+    #  the except catches this situation and deals with it appropriately.
+    try:
+        nT = masked_array.mask.shape[0]
+
+        # For each pixel, count how many times are masked.
+        nMasked = masked_array.mask[:, :, :].sum(axis=0)
+
+        # Define new mask as when a pixel has over a defined threshold ratio of masked data
+        #   e.g. if the threshold is 75%, and there are 10 times,
+        #        then a pixel will be masked if more than 5 times are masked.
+        mymask = nMasked > (nT * threshold)
+
+    except:
+        mymask = np.zeros_like(masked_array.data[0, :, :])
 
-   return mymask
+    return mymask
 
 def calc_average_on_new_time_unit(data, dateList, unit='monthly'):
-   '''
-    Routine to calculate averages on longer time units than the data exists on.
-    
-    Example::
-        If the data is 6-hourly, calculate daily, or monthly means.
-   
-    Input::
-        data     - data values
-        dateList - list of python datetime structures corresponding to data values
-        unit     - string describing time unit to average onto. 
-        *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'*
-      
-     Output:
-        meanstorem - numpy masked array of data values meaned over required time period
-        newTimesList - a list of python datetime objects representing the data in the new averagin period
-        *NB.* currently set to beginning of averaging period, 
-        **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.**
-   '''
+    '''
+     Routine to calculate averages on longer time units than the data exists on.
+
+     Example::
+         If the data is 6-hourly, calculate daily, or monthly means.
 
-   # First catch unknown values of time unit passed in by user
-   acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | (unit == 'daily') | (unit == 'pentad')
+     Input::
+         data     - data values
+         dateList - list of python datetime structures corresponding to data values
+         unit     - string describing time unit to average onto.
+         *Valid values are: 'monthly', 'daily', 'pentad','annual','decadal'*
+
+      Output:
+         meanstorem - numpy masked array of data values meaned over required time period
+         newTimesList - a list of python datetime objects representing the data in the new averagin period
+         *NB.* currently set to beginning of averaging period,
+         **i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.**
+    '''
 
-   if not acceptable:
-      print 'Error: unknown unit type selected for time averaging'
-      print '       Please check your code.'
-      return
+    # First catch unknown values of time unit passed in by user
+    acceptable = (unit == 'full') | (unit == 'annual') | (unit == 'monthly') | (unit == 'daily') | (unit == 'pentad')
 
-   # Calculate arrays of years (2007,2007),
-   #                     yearsmonths (200701,200702),
-   #                     or yearmonthdays (20070101,20070102) 
-   #  -depending on user selected averaging period.
+    if not acceptable:
+        print 'Error: unknown unit type selected for time averaging'
+        print '       Please check your code.'
+        return
+
+    # Calculate arrays of years (2007,2007),
+    #                     yearsmonths (200701,200702),
+    #                     or yearmonthdays (20070101,20070102)
+    #  -depending on user selected averaging period.
+
+    # Year list
+    if unit == 'annual':
+        print 'Calculating annual mean data'
+        timeunits = []
 
-   # Year list
-   if unit == 'annual':
-      print 'Calculating annual mean data'
-      timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year))
 
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year))
+        timeunits = np.array(timeunits, dtype=int)
 
-      timeunits = np.array(timeunits, dtype=int)
-         
-   # YearMonth format list
-   if unit == 'monthly':
-      print 'Calculating monthly mean data'
-      timeunits = []
+    # YearMonth format list
+    if unit == 'monthly':
+        print 'Calculating monthly mean data'
+        timeunits = []
 
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
 
-      timeunits = np.array(timeunits, dtype=int)
+        timeunits = np.array(timeunits, dtype=int)
 
-   # YearMonthDay format list
-   if unit == 'daily':
-      print 'Calculating daily mean data'
-      timeunits = []
- 
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + \
-                          str("%02d" % dateList[i].day))
+    # YearMonthDay format list
+    if unit == 'daily':
+        print 'Calculating daily mean data'
+        timeunits = []
 
-      timeunits = np.array(timeunits, dtype=int)
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + \
+                             str("%02d" % dateList[i].day))
 
+        timeunits = np.array(timeunits, dtype=int)
 
-   # TODO: add pentad setting using Julian days?
 
+    # TODO: add pentad setting using Julian days?
 
-   # Full list: a special case
-   if unit == 'full':
-      print 'Calculating means data over full time range'
-      timeunits = []
 
-      for i in np.arange(len(dateList)):
-         timeunits.append(999)  # i.e. we just want the same value for all times.
+    # Full list: a special case
+    if unit == 'full':
+        print 'Calculating means data over full time range'
+        timeunits = []
 
-      timeunits = np.array(timeunits, dtype=int)
+        for i in np.arange(len(dateList)):
+            timeunits.append(999)  # i.e. we just want the same value for all times.
 
+        timeunits = np.array(timeunits, dtype=int)
 
 
-   # empty list to store new times
-   newTimesList = []
 
-   # Decide whether or not you need to do any time averaging.
-   #   i.e. if data are already on required time unit then just pass data through and 
-   #        calculate and return representative datetimes.
-   processing_required = True
-   if len(timeunits) == (len(np.unique(timeunits))):
+    # empty list to store new times
+    newTimesList = []
+
+    # Decide whether or not you need to do any time averaging.
+    #   i.e. if data are already on required time unit then just pass data through and
+    #        calculate and return representative datetimes.
+    processing_required = True
+    if len(timeunits) == (len(np.unique(timeunits))):
         processing_required = False
 
-   # 1D data arrays, i.e. time series
-   if data.ndim == 1:
-     # Create array to store the resulting data
-     meanstore = np.zeros(len(np.unique(timeunits)))
-  
-     # Calculate the means across each unique time unit
-     i = 0
-     for myunit in np.unique(timeunits):
-       if processing_required:
-         datam = ma.masked_array(data, timeunits != myunit)
-         meanstore[i] = ma.average(datam)
-
-       # construct new times list
-       smyunit = str(myunit)
-       if len(smyunit) == 4:  # YYYY
-        yyyy = int(myunit[0:4])
-        mm = 1
-        dd = 1
-       if len(smyunit) == 6:  # YYYYMM
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = 1
-       if len(smyunit) == 8:  # YYYYMMDD
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = int(smyunit[6:8])
-       if len(smyunit) == 3:  # Full time range
-        # Need to set an appropriate time representing the mid-point of the entire time span
-        dt = dateList[-1] - dateList[0]
-        halfway = dateList[0] + (dt / 2)
-        yyyy = int(halfway.year)
-        mm = int(halfway.month)
-        dd = int(halfway.day)
-  
-       newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
-       i = i + 1
-
-   # 3D data arrays
-   if data.ndim == 3:
-
-     #datamask = create_mask_using_threshold(data,threshold=0.75)
-
-     # Create array to store the resulting data
-     meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], data.shape[2]])
-  
-     # Calculate the means across each unique time unit
-     i = 0
-     datamask_store = []
-     for myunit in np.unique(timeunits):
-       if processing_required:
-
-         mask = np.zeros_like(data)
-         mask[timeunits != myunit, :, :] = 1.0
-
-         # Calculate missing data mask within each time unit...
-         datamask_at_this_timeunit = np.zeros_like(data)
-         datamask_at_this_timeunit[:] = create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75)
-         # Store results for masking later
-         datamask_store.append(datamask_at_this_timeunit[0])
-
-         # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
-         datam = ma.masked_array(data, np.logical_or(mask, datamask_at_this_timeunit))
-         meanstore[i, :, :] = ma.average(datam, axis=0)
-
-       # construct new times list
-       smyunit = str(myunit)
-       if len(smyunit) == 4:  # YYYY
-        yyyy = int(smyunit[0:4])
-        mm = 1
-        dd = 1
-       if len(smyunit) == 6:  # YYYYMM
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = 1
-       if len(smyunit) == 8:  # YYYYMMDD
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = int(smyunit[6:8])
-       if len(smyunit) == 3:  # Full time range
-        # Need to set an appropriate time representing the mid-point of the entire time span
-        dt = dateList[-1] - dateList[0]
-        halfway = dateList[0] + (dt / 2)
-        yyyy = int(halfway.year)
-        mm = int(halfway.month)
-        dd = int(halfway.day)
-
-       newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
-
-       i += 1
-
-     if not processing_required:
-        meanstorem = data
-
-     if processing_required:
-        # Create masked array (using missing data mask defined above)
-        datamask_store = np.array(datamask_store)
-        meanstorem = ma.masked_array(meanstore, datamask_store)
+    # 1D data arrays, i.e. time series
+    if data.ndim == 1:
+        # Create array to store the resulting data
+        meanstore = np.zeros(len(np.unique(timeunits)))
+
+        # Calculate the means across each unique time unit
+        i = 0
+        for myunit in np.unique(timeunits):
+            if processing_required:
+                datam = ma.masked_array(data, timeunits != myunit)
+                meanstore[i] = ma.average(datam)
+
+            # construct new times list
+            smyunit = str(myunit)
+            if len(smyunit) == 4:  # YYYY
+                yyyy = int(myunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit) == 6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit) == 8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit) == 3:  # Full time range
+                # Need to set an appropriate time representing the mid-point of the entire time span
+                dt = dateList[-1] - dateList[0]
+                halfway = dateList[0] + (dt / 2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+
+            newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
+            i = i + 1
+
+    # 3D data arrays
+    if data.ndim == 3:
+
+        #datamask = create_mask_using_threshold(data,threshold=0.75)
+
+        # Create array to store the resulting data
+        meanstore = np.zeros([len(np.unique(timeunits)), data.shape[1], data.shape[2]])
 
-   return meanstorem, newTimesList
+        # Calculate the means across each unique time unit
+        i = 0
+        datamask_store = []
+        for myunit in np.unique(timeunits):
+            if processing_required:
+
+                mask = np.zeros_like(data)
+                mask[timeunits != myunit, :, :] = 1.0
+
+                # Calculate missing data mask within each time unit...
+                datamask_at_this_timeunit = np.zeros_like(data)
+                datamask_at_this_timeunit[:] = create_mask_using_threshold(data[timeunits == myunit, :, :], threshold=0.75)
+                # Store results for masking later
+                datamask_store.append(datamask_at_this_timeunit[0])
+
+                # Calculate means for each pixel in this time unit, ignoring missing data (using masked array).
+                datam = ma.masked_array(data, np.logical_or(mask, datamask_at_this_timeunit))
+                meanstore[i, :, :] = ma.average(datam, axis=0)
+
+            # construct new times list
+            smyunit = str(myunit)
+            if len(smyunit) == 4:  # YYYY
+                yyyy = int(smyunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit) == 6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit) == 8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit) == 3:  # Full time range
+                # Need to set an appropriate time representing the mid-point of the entire time span
+                dt = dateList[-1] - dateList[0]
+                halfway = dateList[0] + (dt / 2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+
+            newTimesList.append(datetime.datetime(yyyy, mm, dd, 0, 0, 0, 0))
+
+            i += 1
+
+        if not processing_required:
+            meanstorem = data
+
+        if processing_required:
+            # Create masked array (using missing data mask defined above)
+            datamask_store = np.array(datamask_store)
+            meanstorem = ma.masked_array(meanstore, datamask_store)
+
+    return meanstorem, newTimesList
 
 def calc_running_accum_from_period_accum(data):
     '''
      Routine to calculate running total accumulations from individual period accumulations.
      ::
-    
+
          e.g.  0,0,1,0,0,2,2,1,0,0
             -> 0,0,1,1,1,3,5,6,6,6
-    
+
      Input::
           data: numpy array with time in the first axis
-    
+
      Output::
           running_acc: running accumulations
-    
+
     '''
-    
-    
+
+
     running_acc = np.zeros_like(data)
-    
+
     if(len(data.shape) == 1):
         running_acc[0] = data[0]
-    
+
     if(len(data.shape) > 1):
         running_acc[0, :] = data[0, :]
-    
+
     for i in np.arange(data.shape[0] - 1):
         if(len(data.shape) == 1):
             running_acc[i + 1] = running_acc[i] + data[i + 1]
-    
+
         if(len(data.shape) > 1):
             running_acc[i + 1, :] = running_acc[i, :] + data[i + 1, :]
-    
+
     return running_acc
 
 def ignore_boundaries(data, rim=10):
     '''
      Routine to mask the lateral boundary regions of model data to ignore them from calculations.
-     
+
      Input::
          data - a masked array of model data
          rim - (optional) number of grid points to ignore
-    
+
      Output::
          data - data array with boundary region masked
-    
+
     '''
-    
+
     nx = data.shape[1]
     ny = data.shape[0]
-    
+
     rimmask = np.zeros_like(data)
     for j in np.arange(rim):
         rimmask[j, 0:nx - 1] = 1.0
-    
+
     for j in ny - 1 - np.arange(rim):
         rimmask[j, 0:nx - 1] = 1.0
-    
+
     for i in np.arange(rim):
         rimmask[0:ny - 1, i] = 1.0
-    
+
     for i in nx - 1 - np.arange(rim):
         rimmask[0:ny - 1, i] = 1.0
-    
+
     data = ma.masked_array(data, mask=rimmask)
-    
+
     return data
 
 def normalizeDatetimes(datetimes, timestep):
@@ -546,10 +546,10 @@ def normalizeDatetimes(datetimes, timest
         timestep - string of value ('daily' | 'monthly')
     Output::
         normalDatetimes - list of datetime objects that have been normalized
-    
+
     Normalization Rules::
         Daily data will be forced to an hour value of 00:00:00
-        Monthly data will be forced to the first of the month at midnight 
+        Monthly data will be forced to the first of the month at midnight
     """
     normalDatetimes = []
     if timestep.lower() == 'monthly':
@@ -568,7 +568,7 @@ def normalizeDatetimes(datetimes, timest
                 datetimeString = inputDatetime.strftime('%Y%m%d%H%M%S')
                 normalDatetimeString = datetimeString[:8] + '000000'
                 inputDatetime = datetime.datetime.strptime(normalDatetimeString, '%Y%m%d%H%M%S')
-            
+
             normalDatetimes.append(inputDatetime)
 
 
@@ -600,7 +600,7 @@ def getModelTimes(modelFile, timeVarName
     except AttributeError:
         print 'Error decoding model times: time variable attributes do not contain "since"'
         raise
-    
+
     units = None
     TIME_UNITS = ('minutes', 'hours', 'days', 'months', 'years')
     # search for 'seconds','minutes','hours', 'days', 'months', 'years' so know units
@@ -616,8 +616,9 @@ def getModelTimes(modelFile, timeVarName
     times = []
 
     for xtime in xtimes[:]:
-        
+
         # Cast time as an int
+        #TODO: KDW this may cause problems for data that is hourly with more than one timestep in it
         xtime = int(xtime)
 
         if units == 'minutes':
@@ -643,15 +644,22 @@ def getModelTimes(modelFile, timeVarName
             new_time = base_time + dt
 
         times.append(new_time)
-    
+
     try:
-        timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12)
+        if len(xtimes) == 1:
+            timeStepLength = 0
+        else:
+            timeStepLength = int(xtimes[1] - xtimes[0] + 1.e-12)
+            
         modelTimeStep = getModelTimeStep(units, timeStepLength)
+       
+        #if timeStepLength is zero do not normalize times as this would create an empty list for MERG (hourly) data
+        if timeStepLength != 0:
+            times = normalizeDatetimes(times, modelTimeStep) 
     except:
         raise
-    
-    times = normalizeDatetimes(times, modelTimeStep)
-    
+
+   
     return times, modelTimeStep
 
 def getModelTimeStep(units, stepSize):
@@ -665,21 +673,22 @@ def getModelTimeStep(units, stepSize):
         # 28 days through 31 days
         elif 40320 <= stepSize <= 44640:
             modelTimeStep = 'monthly'
-        # 365 days through 366 days 
+        # 365 days through 366 days
         elif 525600 <= stepSize <= 527040:
-            modelTimeStep = 'annual' 
+            modelTimeStep = 'annual'
         else:
             raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
 
     elif units == 'hours':
-        if stepSize == 1:
+        #need a check for fractional hrs and only one hr i.e. stepSize=0 e.g. with MERG data
+        if stepSize == 0 or stepSize == 1:
             modelTimeStep = 'hourly'
         elif stepSize == 24:
             modelTimeStep = 'daily'
         elif 672 <= stepSize <= 744:
-            modelTimeStep = 'monthly' 
+            modelTimeStep = 'monthly'
         elif 8760 <= stepSize <= 8784:
-            modelTimeStep = 'annual' 
+            modelTimeStep = 'annual'
         else:
             raise Exception('model data time step interval exceeds the max time interval (annual)', units, stepSize)
 
@@ -720,7 +729,7 @@ def decodeTimeFromString(time_string):
      ::
 
        **Input:**  time_string - a string that represents a date/time
-       
+
        **Output:** mytime - a python datetime object
     '''
 
@@ -779,117 +788,124 @@ def decodeTimeFromString(time_string):
         pass
 
     try:
-        mytime = time.strptime(time_string, '%Y-%m-%d')
+        mytime = time.strptime(time_string, '%Y-%m-%d %H')
         mytime = datetime.datetime(*mytime[0:6])
         return mytime
 
     except ValueError:
         pass
 
+    try:
+        mytime = time.strptime(time_string, '%Y-%m-%d')
+        mytime = datetime.datetime(*mytime[0:6])
+        return mytime
+
+    except ValueError:
+        pass
 
     print 'Error decoding time string: string does not match a predefined time format'
     return 0
 
 def regrid_wrapper(regrid_choice, obsdata, obslats, obslons, wrfdata, wrflats, wrflons):
-   '''
-    Wrapper routine for regridding.
-    
-    Either regrids model to obs grid, or obs to model grid, depending on user choice
-    
-        Inputs::
-            regrid_choice - [0] = Regrid obs data onto model grid or 
-                            [1] = Regrid model data onto obs grid
-
-            obsdata,wrfdata - data arrays
-            obslats,obslons - observation lat,lon arrays
-            wrflats,wrflons - model lat,lon arrays
-   
-        Output::
-            rdata,rdata2 - regridded data
-   	        lats,lons    - latitudes and longitudes of regridded data
-   
-   '''
+    '''
+     Wrapper routine for regridding.
+
+     Either regrids model to obs grid, or obs to model grid, depending on user choice
+
+         Inputs::
+             regrid_choice - [0] = Regrid obs data onto model grid or
+                             [1] = Regrid model data onto obs grid
+
+             obsdata,wrfdata - data arrays
+             obslats,obslons - observation lat,lon arrays
+             wrflats,wrflons - model lat,lon arrays
 
-   # Regrid obs data onto model grid
-   if(regrid_choice == '0'):
+         Output::
+             rdata,rdata2 - regridded data
+                 lats,lons    - latitudes and longitudes of regridded data
 
-      ndims = len(obsdata.shape)
-      if(ndims == 3):
-         newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]]
-	 nT = obsdata.shape[0]
+    '''
+
+    # Regrid obs data onto model grid
+    if(regrid_choice == '0'):
+
+        ndims = len(obsdata.shape)
+        if(ndims == 3):
+            newshape = [obsdata.shape[0], wrfdata.shape[1], wrfdata.shape[2]]
+            nT = obsdata.shape[0]
+
+        if(ndims == 2):
+            newshape = [wrfdata.shape[0], wrfdata.shape[1]]
+            nT = 0
+
+        rdata = wrfdata
+        lats, lons = wrflats, wrflons
 
-      if(ndims == 2):
-         newshape = [wrfdata.shape[0], wrfdata.shape[1]]
-	 nT = 0
+        # Create a new masked array with the required dimensions
+        tmp = np.zeros(newshape)
+        rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0))
+        tmp = 0 # free up some memory
 
-      rdata = wrfdata
-      lats, lons = wrflats, wrflons
+        rdata2[:] = 0.0
 
-      # Create a new masked array with the required dimensions
-      tmp = np.zeros(newshape)
-      rdata2 = ma.MaskedArray(tmp, mask=(tmp == 0))
-      tmp = 0 # free up some memory
+        if(nT > 0):
+            for t in np.arange(nT):
+                rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
 
-      rdata2[:] = 0.0
+        if(nT == 0):
+            rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
 
-      if(nT > 0):
-       for t in np.arange(nT):
-         rdata2[t, :, :] = do_regrid(obsdata[t, :, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
 
-      if(nT == 0):
-         rdata2[:, :] = do_regrid(obsdata[:, :], obslats[:, :], obslons[:, :], wrflats[:, :], wrflons[:, :])
+    # Regrid model data onto obs grid
+    if(regrid_choice == '1'):
+        ndims = len(wrfdata.shape)
+        if(ndims == 3):
+            newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]]
+            nT = obsdata.shape[0]
 
+        if(ndims == 2):
+            newshape = [obsdata.shape[0], obsdata.shape[1]]
+            nT = 0
 
-   # Regrid model data onto obs grid
-   if(regrid_choice == '1'):
-      ndims = len(wrfdata.shape)
-      if(ndims == 3):
-         newshape = [wrfdata.shape[0], obsdata.shape[1], obsdata.shape[2]]
-	 nT = obsdata.shape[0]
+        rdata2 = obsdata
+        lats, lons = obslats, obslons
 
-      if(ndims == 2):
-         newshape = [obsdata.shape[0], obsdata.shape[1]]
-	 nT = 0
+        tmp = np.zeros(newshape)
+        rdata = ma.MaskedArray(tmp, mask=(tmp == 0))
+        tmp = 0 # free up some memory
 
-      rdata2 = obsdata
-      lats, lons = obslats, obslons
+        rdata[:] = 0.0
 
-      tmp = np.zeros(newshape)
-      rdata = ma.MaskedArray(tmp, mask=(tmp == 0))
-      tmp = 0 # free up some memory
+        if(nT > 0):
+            for t in np.arange(nT):
+                rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
 
-      rdata[:] = 0.0
- 
-      if(nT > 0):
-        for t in np.arange(nT):
-	 rdata[t, :, :] = do_regrid(wrfdata[t, :, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
+        if(nT == 0):
+            rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
 
-      if(nT == 0):
-	rdata[:, :] = do_regrid(wrfdata[:, :], wrflats[:, :], wrflons[:, :], obslats[:, :], obslons[:, :])
 
-  
-   return rdata, rdata2, lats, lons
+    return rdata, rdata2, lats, lons
 
 def extract_sub_time_selection(allTimes, subTimes, data):
     '''
      Routine to extract a sub-selection of times from a data array.
 
      Example::
-           Suppose a data array has 30 time values for daily data for a whole month, 
+           Suppose a data array has 30 time values for daily data for a whole month,
            but you only want the data from the 5th-15th of the month.
-    
+
      Input::
            allTimes - list of datetimes describing what times the data in the data array correspond to
            subTimes - the times you want to extract data from
            data     - the data array
-    
+
      Output::
            subdata     - subselection of data array
-    
+
     '''
     # Create new array to store the subselection
     subdata = np.zeros([len(subTimes), data.shape[1], data.shape[2]])
-    
+
     # Loop over all Times and when it is a member of the required subselection, copy the data across
     i = 0     # counter for allTimes
     subi = 0  # counter for subTimes
@@ -898,23 +914,23 @@ def extract_sub_time_selection(allTimes,
             subdata[subi, :, :] = data[i, :, :]
             subi += 1
         i += 1
-    
+
     return subdata
 
 def calc_average_on_new_time_unit_K(data, dateList, unit):
     '''
     Routine to calculate averages on longer time units than the data exists on.
     e.g. if the data is 6-hourly, calculate daily, or monthly means.
-     
+
     Input:
         data     - data values
         dateList - list of python datetime structures corresponding to data values
-        unit     - string describing time unit to average onto 
+        unit     - string describing time unit to average onto
                       e.g. 'monthly', 'daily', 'pentad','annual','decadal'
     Output:
         meanstorem - numpy masked array of data values meaned over required time period
         newTimesList - a list of python datetime objects representing the data in the new averagin period
-                           NB. currently set to beginning of averaging period, 
+                           NB. currently set to beginning of averaging period,
                            i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
     '''
 
@@ -926,7 +942,7 @@ def calc_average_on_new_time_unit_K(data
 
     # Calculate arrays of: annual timeseries: year (2007,2007),
     #                      monthly time series: year-month (200701,200702),
-    #                      daily timeseries:  year-month-day (20070101,20070102) 
+    #                      daily timeseries:  year-month-day (20070101,20070102)
     #  depending on user-selected averaging period.
 
     # Year list
@@ -935,7 +951,7 @@ def calc_average_on_new_time_unit_K(data
         for i in np.arange(len(dateList)):
             timeunits.append(str(dateList[i].year))
         timeunits = np.array(timeunits, dtype=int)
-         
+
     # YearMonth format list
     if unit=='monthly':
         timeunits = []
@@ -964,7 +980,7 @@ def calc_average_on_new_time_unit_K(data
     newTimesList = []
 
     # Decide whether or not you need to do any time averaging.
-    #   i.e. if data are already on required time unit then just pass data through and 
+    #   i.e. if data are already on required time unit then just pass data through and
     #        calculate and return representative datetimes.
     processing_required = True
     if len(timeunits)==(len(np.unique(timeunits))):
@@ -974,14 +990,14 @@ def calc_average_on_new_time_unit_K(data
     if data.ndim==1:
         # Create array to store the resulting data
         meanstore = np.zeros(len(np.unique(timeunits)))
-  
+
         # Calculate the means across each unique time unit
         i=0
         for myunit in np.unique(timeunits):
             if processing_required:
                 datam=ma.masked_array(data,timeunits!=myunit)
                 meanstore[i] = ma.average(datam)
-            
+
             # construct new times list
             smyunit = str(myunit)
             if len(smyunit)==4:  # YYYY
@@ -1012,7 +1028,7 @@ def calc_average_on_new_time_unit_K(data
         # datamask = create_mask_using_threshold(data,threshold=0.75)
         # Create array to store the resulting data
         meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
-  
+
         # Calculate the means across each unique time unit
         i=0
         datamask_store = []
@@ -1119,11 +1135,11 @@ def decode_model_timesK(ifile,timeVarNam
     times=[]
     for ttmp in xtimes[:]:
         xtime = int(ttmp)
-        if(units=='minutes'):  
+        if(units=='minutes'):
             dt = datetime.timedelta(minutes=xtime); new_time = base_time + dt
-        if(units=='hours'):  
+        if(units=='hours'):
             dt = datetime.timedelta(hours=xtime); new_time = base_time + dt
-        if(units=='days'):  
+        if(units=='days'):
             dt = datetime.timedelta(days=xtime); new_time = base_time + dt
         if(units=='months'):   # NB. adding months in python is complicated as month length varies and hence ambigous.
             # Perform date arithmatic manually
@@ -1141,139 +1157,139 @@ def decode_model_timesK(ifile,timeVarNam
 
 
 def regrid_in_time(data,dateList,unit):
-   #################################################################################################
-   # Routine to calculate averages on longer time units than the data exists on.
-   #  e.g. if the data is 6-hourly, calculate daily, or monthly means.
-   #  Input:
-   #         data     - data values
-   #         dateList - list of python datetime structures corresponding to data values
-   #         unit     - string describing time unit to average onto 
-   #                       e.g. 'monthly', 'daily', 'pentad','annual','decadal'
-   #  Output:
-   #         meanstorem - numpy masked array of data values meaned over required time period
-   #         newTimesList - a list of python datetime objects representing the data in the new averagin period
-   #                            NB. currently set to beginning of averaging period, 
-   #                            i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
-   # ..............................
-   #   Jinwon Kim, Sept 30, 2012
-   #   Created from calc_average_on_new_time_unit_K, Peter's original, with the modification below:
-   #   Instead of masked array used by Peter, use wh to defined data within the averaging range.
-   #################################################################################################
-
-   print '***  Begin calc_average_on_new_time_unit_KK  ***'
-   # Check if the user-selected temporal grid is valid. If not, EXIT
-   acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
-   if not acceptable:
-     print 'Error: unknown unit type selected for time averaging: EXIT'; return -1,-1,-1,-1
-
-   # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, [200701,200702]), daily (yyyymmdd, [20070101,20070102]) 
-   # "%02d" is similar to i2.2 in Fortran
-   if unit=='annual':                      # YYYY
-      timeunits = []
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year))
-   elif unit=='monthly':                   # YYYYMM
-      timeunits = []
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
-   elif unit=='daily':                     # YYYYMMDD
-      timeunits = []
-      for i in np.arange(len(dateList)):
-         timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
-   elif unit=='full':                      # Full list: a special case
-      comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
-      timeunits = []
-      for i in np.arange(len(dateList)):
-         timeunits.append(999)             # i.e. we just want the same value for all times.
-   timeunits = np.array(timeunits,dtype=int)
-   print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits
-   #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits
-
-   # empty list to store time levels after temporal regridding
-   newTimesList = []
-
-   # Decide whether or not you need to do any time averaging.
-   #   i.e. if data are already on required time unit then just pass data through and calculate and return representative datetimes.
-   processing_required = True
-   if len(timeunits)==(len(np.unique(timeunits))):
+    #################################################################################################
+    # Routine to calculate averages on longer time units than the data exists on.
+    #  e.g. if the data is 6-hourly, calculate daily, or monthly means.
+    #  Input:
+    #         data     - data values
+    #         dateList - list of python datetime structures corresponding to data values
+    #         unit     - string describing time unit to average onto
+    #                       e.g. 'monthly', 'daily', 'pentad','annual','decadal'
+    #  Output:
+    #         meanstorem - numpy masked array of data values meaned over required time period
+    #         newTimesList - a list of python datetime objects representing the data in the new averagin period
+    #                            NB. currently set to beginning of averaging period,
+    #                            i.e. mean Jan 1st - Jan 31st -> represented as Jan 1st, 00Z.
+    # ..............................
+    #   Jinwon Kim, Sept 30, 2012
+    #   Created from calc_average_on_new_time_unit_K, Peter's original, with the modification below:
+    #   Instead of masked array used by Peter, use wh to defined data within the averaging range.
+    #################################################################################################
+
+    print '***  Begin calc_average_on_new_time_unit_KK  ***'
+    # Check if the user-selected temporal grid is valid. If not, EXIT
+    acceptable = (unit=='full')|(unit=='annual')|(unit=='monthly')|(unit=='daily')|(unit=='pentad')
+    if not acceptable:
+        print 'Error: unknown unit type selected for time averaging: EXIT'; return -1,-1,-1,-1
+
+    # Calculate time arrays of: annual (yyyy, [2007]), monthly (yyyymm, [200701,200702]), daily (yyyymmdd, [20070101,20070102])
+    # "%02d" is similar to i2.2 in Fortran
+    if unit=='annual':                      # YYYY
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year))
+    elif unit=='monthly':                   # YYYYMM
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month))
+    elif unit=='daily':                     # YYYYMMDD
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(str(dateList[i].year) + str("%02d" % dateList[i].month) + str("%02d" % dateList[i].day))
+    elif unit=='full':                      # Full list: a special case
+        comment='Calculating means data over the entire time range: i.e., annual-mean climatology'
+        timeunits = []
+        for i in np.arange(len(dateList)):
+            timeunits.append(999)             # i.e. we just want the same value for all times.
+    timeunits = np.array(timeunits,dtype=int)
+    print 'timeRegridOption= ',unit#'; output timeunits= ',timeunits
+    #print 'timeRegridOption= ',unit,'; output timeunits= ',timeunits
+
+    # empty list to store time levels after temporal regridding
+    newTimesList = []
+
+    # Decide whether or not you need to do any time averaging.
+    #   i.e. if data are already on required time unit then just pass data through and calculate and return representative datetimes.
+    processing_required = True
+    if len(timeunits)==(len(np.unique(timeunits))):
         processing_required = False
-   print 'processing_required= ',processing_required,': input time steps= ',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits))
-   #print 'np.unique(timeunits): ',np.unique(timeunits)
-   print 'data.ndim= ',data.ndim
-
-   if data.ndim==1:                                           # 1D data arrays, i.e. 1D time series
-      # Create array to store the temporally regridded data
-     meanstore = np.zeros(len(np.unique(timeunits)))
-      # Calculate the means across each unique time unit
-     i=0
-     for myunit in np.unique(timeunits):
-       if processing_required:
-         wh = timeunits==myunit
-         datam = data[wh]
-         meanstore[i] = ma.average(datam)
-        # construct new times list
-       smyunit = str(myunit)
-       if len(smyunit)==4:  # YYYY
-        yyyy = int(myunit[0:4])
-        mm = 1
-        dd = 1
-       if len(smyunit)==6:  # YYYYMM
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = 1
-       if len(smyunit)==8:  # YYYYMMDD
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = int(smyunit[6:8])
-       if len(smyunit)==3:  # Full time range
-        # Need to set an appropriate time representing the mid-point of the entire time span
-        dt = dateList[-1]-dateList[0]
-        halfway = dateList[0]+(dt/2)
-        yyyy = int(halfway.year)
-        mm = int(halfway.month)
-        dd = int(halfway.day)
-       newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-       i = i+1
-
-   elif data.ndim==3:                                         # 3D data arrays, i.e. 2D time series
-      # Create array to store the resulting data
-     meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
-     # Calculate the means across each unique time unit
-     i=0
-     datamask_store = []
-     for myunit in np.unique(timeunits):
-       if processing_required:
-         wh = timeunits==myunit
-         datam = data[wh,:,:]
-         meanstore[i,:,:] = ma.average(datam,axis=0)
+    print 'processing_required= ',processing_required,': input time steps= ',len(timeunits),': regridded output time steps = ',len(np.unique(timeunits))
+    #print 'np.unique(timeunits): ',np.unique(timeunits)
+    print 'data.ndim= ',data.ndim
+
+    if data.ndim==1:                                           # 1D data arrays, i.e. 1D time series
+        # Create array to store the temporally regridded data
+        meanstore = np.zeros(len(np.unique(timeunits)))
+        # Calculate the means across each unique time unit
+        i=0
+        for myunit in np.unique(timeunits):
+            if processing_required:
+                wh = timeunits==myunit
+                datam = data[wh]
+                meanstore[i] = ma.average(datam)
         # construct new times list
-       smyunit = str(myunit)
-       if len(smyunit)==4:  # YYYY
-        yyyy = int(smyunit[0:4])
-        mm = 1
-        dd = 1
-       if len(smyunit)==6:  # YYYYMM
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = 1
-       if len(smyunit)==8:  # YYYYMMDD
-        yyyy = int(smyunit[0:4])
-        mm = int(smyunit[4:6])
-        dd = int(smyunit[6:8])
-       if len(smyunit)==3:  # Full time range
-        # Need to set an appropriate time representing the mid-point of the entire time span
-        dt = dateList[-1]-dateList[0]
-        halfway = dateList[0]+(dt/2)
-        yyyy = int(halfway.year)
-        mm = int(halfway.month)
-        dd = int(halfway.day)
-       newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
-       i += 1
-
-     if not processing_required:
-        meanstorem = data
-     elif processing_required:
-        meanstorem = meanstore
+            smyunit = str(myunit)
+            if len(smyunit)==4:  # YYYY
+                yyyy = int(myunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit)==6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit)==8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit)==3:  # Full time range
+                # Need to set an appropriate time representing the mid-point of the entire time span
+                dt = dateList[-1]-dateList[0]
+                halfway = dateList[0]+(dt/2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+            i = i+1
 
-   print '***  End calc_average_on_new_time_unit_KK  ***'
-   return meanstorem,newTimesList
+    elif data.ndim==3:                                         # 3D data arrays, i.e. 2D time series
+        # Create array to store the resulting data
+        meanstore = np.zeros([len(np.unique(timeunits)),data.shape[1],data.shape[2]])
+        # Calculate the means across each unique time unit
+        i=0
+        datamask_store = []
+        for myunit in np.unique(timeunits):
+            if processing_required:
+                wh = timeunits==myunit
+                datam = data[wh,:,:]
+                meanstore[i,:,:] = ma.average(datam,axis=0)
+                # construct new times list
+            smyunit = str(myunit)
+            if len(smyunit)==4:  # YYYY
+                yyyy = int(smyunit[0:4])
+                mm = 1
+                dd = 1
+            if len(smyunit)==6:  # YYYYMM
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = 1
+            if len(smyunit)==8:  # YYYYMMDD
+                yyyy = int(smyunit[0:4])
+                mm = int(smyunit[4:6])
+                dd = int(smyunit[6:8])
+            if len(smyunit)==3:  # Full time range
+                # Need to set an appropriate time representing the mid-point of the entire time span
+                dt = dateList[-1]-dateList[0]
+                halfway = dateList[0]+(dt/2)
+                yyyy = int(halfway.year)
+                mm = int(halfway.month)
+                dd = int(halfway.day)
+            newTimesList.append(datetime.datetime(yyyy,mm,dd,0,0,0,0))
+            i += 1
+
+        if not processing_required:
+            meanstorem = data
+        elif processing_required:
+            meanstorem = meanstore
+
+    print '***  End calc_average_on_new_time_unit_KK  ***'
+    return meanstorem,newTimesList



Mime
View raw message