climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omk...@apache.org
Subject [14/16] climate git commit: CLIMATE-769 Adding PO.DAAC data source
Date Sat, 27 Aug 2016 03:42:36 GMT
CLIMATE-769 Adding PO.DAAC data source


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/0b2b21a5
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/0b2b21a5
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/0b2b21a5

Branch: refs/heads/master
Commit: 0b2b21a54636ecb95ad75e24a83dd4e49d0c8a2c
Parents: e12314a
Author: Omkar20895 <omkarreddy2008@gmail.com>
Authored: Sat Aug 27 00:03:49 2016 +0530
Committer: Omkar20895 <omkarreddy2008@gmail.com>
Committed: Sat Aug 27 00:03:49 2016 +0530

----------------------------------------------------------------------
 docs/source/data_source/data_sources.rst |   5 ++
 easy-ocw/ocw-pip-dependencies.txt        |   1 +
 examples/podaac_integration_example.py   |  75 +++++++++++++++++
 ocw/data_source/podaac.py                | 113 --------------------------
 ocw/data_source/podaac_datasource.py     | 111 +++++++++++++++++++++++++
 ocw/dataset_loader.py                    |   6 +-
 ocw/tests/test_podaac.py                 |  10 +--
 7 files changed, 202 insertions(+), 119 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/0b2b21a5/docs/source/data_source/data_sources.rst
----------------------------------------------------------------------
diff --git a/docs/source/data_source/data_sources.rst b/docs/source/data_source/data_sources.rst
index 19f9293..4bb4f16 100644
--- a/docs/source/data_source/data_sources.rst
+++ b/docs/source/data_source/data_sources.rst
@@ -20,3 +20,8 @@ ESGF Module
 ===========
 .. automodule:: esgf
     :members:
+
+PODAAC Module
+===========
+.. automodule:: podaac
+    :members:

http://git-wip-us.apache.org/repos/asf/climate/blob/0b2b21a5/easy-ocw/ocw-pip-dependencies.txt
----------------------------------------------------------------------
diff --git a/easy-ocw/ocw-pip-dependencies.txt b/easy-ocw/ocw-pip-dependencies.txt
index bd609dd..867c801 100644
--- a/easy-ocw/ocw-pip-dependencies.txt
+++ b/easy-ocw/ocw-pip-dependencies.txt
@@ -9,3 +9,4 @@ esgf-pyclient>=0.1.6
 python-dateutil>=2.5.3
 mock>=2.0.0
 myproxyclient>=1.4.3
+podaacpy>=1.0.2

http://git-wip-us.apache.org/repos/asf/climate/blob/0b2b21a5/examples/podaac_integration_example.py
----------------------------------------------------------------------
diff --git a/examples/podaac_integration_example.py b/examples/podaac_integration_example.py
new file mode 100644
index 0000000..da90373
--- /dev/null
+++ b/examples/podaac_integration_example.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import ocw.data_source.podaac_datasource as podaac
+import ocw.evaluation as evaluation
+import ocw.metrics as metrics
+import ocw.plotter as plotter
+
+datasetId = 'PODAAC-CCF30-01XXX'
+variable = 'uwnd'
+name = 'PO.DAAC_test_dataset'
+OUTPUT_PLOT = "ccmp_temporal_std"
+""" Step 1: Load Local NetCDF Files into OCW Dataset Objects """
+print("Loading %s dataset into an OCW dataset objects" % datasetShortName)
+ccmp_dataset = podaac.load_dataset(
+    variable=variable, datasetId=datasetId, name=name)
+print("CCMP_Dataset.values shape: (times, lats, lons) - %s \n" %
+      (ccmp_dataset.values.shape,))
+
+# Acessing latittudes and longitudes of netCDF file
+lats = ccmp_dataset.lats
+lons = ccmp_dataset.lons
+
+""" Step 2:  Build a Metric to use for Evaluation - Temporal STD for this example """
+# You can build your own metrics, but OCW also ships with some common metrics
+print("Setting up a Temporal STD metric to use for evaluation")
+std = metrics.TemporalStdDev()
+
+""" Step 3: Create an Evaluation Object using Datasets and our Metric """
+# The Evaluation Class Signature is:
+# Evaluation(reference, targets, metrics, subregions=None)
+# Evaluation can take in multiple targets and metrics, so we need to convert
+# our examples into Python lists.  Evaluation will iterate over the lists
+print("Making the Evaluation definition")
+# Temporal STD Metric gets one target dataset then reference dataset
+# should be None
+std_evaluation = evaluation.Evaluation(None, [ccmp_dataset], [std])
+print("Executing the Evaluation using the object's run() method")
+std_evaluation.run()
+
+""" Step 4: Make a Plot from the Evaluation.results """
+# The Evaluation.results are a set of nested lists to support many different
+# possible Evaluation scenarios.
+#
+# The Evaluation results docs say:
+# The shape of results is (num_metrics, num_target_datasets) if no subregion
+# Accessing the actual results when we have used 1 metric and 1 dataset is
+# done this way:
+print("Accessing the Results of the Evaluation run")
+results = std_evaluation.unary_results[0][0]
+print("The results are of type: %s" % type(results))
+print("Generating a contour map using ocw.plotter.draw_contour_map()")
+
+fname = OUTPUT_PLOT
+gridshape = (4, 5)  # 20 Years worth of plots. 20 rows in 1 column
+plot_title = "CCMP Temporal Standard Deviation"
+sub_titles = range(2002, 2010, 1)
+
+plotter.draw_contour_map(results, lats, lons, fname,
+                         gridshape=gridshape, ptitle=plot_title,
+                         subtitles=sub_titles)

http://git-wip-us.apache.org/repos/asf/climate/blob/0b2b21a5/ocw/data_source/podaac.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/podaac.py b/ocw/data_source/podaac.py
deleted file mode 100644
index 47a5409..0000000
--- a/ocw/data_source/podaac.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from podaac_data_source import Podaac
-import numpy as np
-from ocw.dataset import Dataset
-from netCDF4 import Dataset as netcdf_dataset
-from netcdftime import utime
-import os
-import urllib
-import xml.etree.ElementTree as ET
-
-
-def _convert_times_to_datetime(time):
-    '''Convert the time object's values to datetime objects
-
-    The time values are stored as some unit since an epoch. These need to be
-    converted into datetime objects for the OCW Dataset object.
-
-    :param time: The time object's values to convert
-    :type time: pydap.model.BaseType
-
-    :returns: list of converted time values as datetime objects
-    '''
-    units = time.units
-    # parse the time units string into a useful object.
-    # NOTE: This assumes a 'standard' calendar. It's possible (likely?) that
-    # users will want to customize this in the future.
-    parsed_time = utime(units)
-    return [parsed_time.num2date(x) for x in time[:]]
-
-
-def load_dataset(variable, datasetId='', datasetShortName='', name=''):
-    '''Loads a Dataset from PODAAC
-
-    :param variable: The name of the variable to read from the dataset.
-    :type variable: :mod:`string`
-
-        :param datasetId: dataset persistent ID. datasetId or \
-        shortName is required for a granule search. Example: \
-        PODAAC-ASOP2-25X01
-    :type datasetId: :mod:`string` 
-
-    :param shortName: the shorter name for a dataset. \
-        Either shortName or datasetId is required for a \
-        granule search. Example: ASCATA-L2-25km
-    :type shortName: :mod:`string`
-
-    :param name: (Optional) A name for the loaded dataset.
-    :type name: :mod:`string`
-
-    :returns: A :class:`dataset.Dataset` containing the dataset pointed to by
-        the OpenDAP URL.
-
-    :raises: ServerError
-    '''
-    # Downloading the dataset using podaac toolkit
-        podaac = Podaac()
-        path = os.path.dirname(os.path.abspath(__file__))
-        granuleName = podaac.extract_l4_granule(
-            datasetId=datasetId, shortName=datasetShortName, path=path)
-        path = path + '/' + granuleName
-        d = netcdf_dataset(path, mode='r')
-        dataset = d.variables[variable]
-
-    # By convention, but not by standard, if the dimensions exist, they will be in the order:
-    # time (t), altitude (z), latitude (y), longitude (x)
-    # but conventions aren't always followed and all dimensions aren't always present so
-    # see if we can make some educated deductions before defaulting to just pulling the first
three
-    # columns.
-        temp_dimensions = map(lambda x: x.lower(), dataset.dimensions)
-        dataset_dimensions = dataset.dimensions
-        time = dataset_dimensions[temp_dimensions.index(
-            'time') if 'time' in temp_dimensions else 0]
-        lat = dataset_dimensions[temp_dimensions.index(
-            'lat') if 'lat' in temp_dimensions else 1]
-        lon = dataset_dimensions[temp_dimensions.index(
-            'lon') if 'lon' in temp_dimensions else 2]
-
-    # Time is given to us in some units since an epoch. We need to convert
-    # these values to datetime objects. Note that we use the main object's
-    # time object and not the dataset specific reference to it. We need to
-    # grab the 'units' from it and it fails on the dataset specific object.
-        times = np.array(_convert_times_to_datetime(d[time]))
-        lats = np.array(d.variables[lat][:])
-        lons = np.array(d.variables[lon][:])
-        values = np.array(dataset[:])
-        origin = {
-            'source': 'PO.DAAC',
-            'url': 'podaac.jpl.nasa.gov/ws'
-        }
-
-    # Removing the downloaded temporary granule before creating the OCW
-    # dataset.
-        d.close()
-        path = os.path.join(os.path.dirname(__file__), granuleName)
-        os.remove(path)
-
-        return Dataset(lats, lons, times, values, variable, name=name, origin=origin)

http://git-wip-us.apache.org/repos/asf/climate/blob/0b2b21a5/ocw/data_source/podaac_datasource.py
----------------------------------------------------------------------
diff --git a/ocw/data_source/podaac_datasource.py b/ocw/data_source/podaac_datasource.py
new file mode 100644
index 0000000..4a08046
--- /dev/null
+++ b/ocw/data_source/podaac_datasource.py
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from podaac.podaac import Podaac
+import numpy as np
+from ocw.dataset import Dataset
+from netCDF4 import Dataset as netcdf_dataset
+from netcdftime import utime
+import os
+
+
+def convert_times_to_datetime(time):
+    '''Convert the time object's values to datetime objects
+
+    The time values are stored as some unit since an epoch. These need to be
+    converted into datetime objects for the OCW Dataset object.
+
+    :param time: The time object's values to convert
+    :type time: pydap.model.BaseType
+
+    :returns: list of converted time values as datetime objects
+    '''
+    units = time.units
+    # parse the time units string into a useful object.
+    # NOTE: This assumes a 'standard' calendar. It's possible (likely?) that
+    # users will want to customize this in the future.
+    parsed_time = utime(units)
+    return [parsed_time.num2date(x) for x in time[:]]
+
+
+def load_dataset(variable, datasetId='', name=''):
+    '''Loads a Dataset from PODAAC
+
+    :param variable: The name of the variable to read from the dataset.
+    :type variable: :mod:`string`
+
+    :param datasetId: dataset persistent ID. datasetId or \
+        shortName is required for a granule search. Example: \
+        PODAAC-ASOP2-25X01
+    :type datasetId: :mod:`string`
+
+    :param shortName: the shorter name for a dataset. \
+        Either shortName or datasetId is required for a \
+        granule search. Example: ASCATA-L2-25km
+    :type shortName: :mod:`string`
+
+    :param name: (Optional) A name for the loaded dataset.
+    :type name: :mod:`string`
+
+    :returns: A :class:`dataset.Dataset` containing the dataset pointed to by
+        the OpenDAP URL.
+
+    :raises: ServerError
+    '''
+    # Downloading the dataset using podaac toolkit
+    podaac = Podaac()
+    path = os.path.dirname(os.path.abspath(__file__))
+    granuleName = podaac.extract_l4_granule(
+        dataset_id=datasetId, path=path)
+    path = path + '/' + granuleName
+    d = netcdf_dataset(path, mode='r')
+    dataset = d.variables[variable]
+
+    # By convention, but not by standard, if the dimensions exist, they will be in the order:
+    # time (t), altitude (z), latitude (y), longitude (x)
+    # but conventions aren't always followed and all dimensions aren't always present so
+    # see if we can make some educated deductions before defaulting to just pulling the first
three
+    # columns.
+    temp_dimensions = map(lambda x: x.lower(), dataset.dimensions)
+    dataset_dimensions = dataset.dimensions
+    time = dataset_dimensions[temp_dimensions.index(
+        'time') if 'time' in temp_dimensions else 0]
+    lat = dataset_dimensions[temp_dimensions.index(
+        'lat') if 'lat' in temp_dimensions else 1]
+    lon = dataset_dimensions[temp_dimensions.index(
+        'lon') if 'lon' in temp_dimensions else 2]
+
+    # Time is given to us in some units since an epoch. We need to convert
+    # these values to datetime objects. Note that we use the main object's
+    # time object and not the dataset specific reference to it. We need to
+    # grab the 'units' from it and it fails on the dataset specific object.
+    times = np.array(convert_times_to_datetime(d[time]))
+    lats = np.array(d.variables[lat][:])
+    lons = np.array(d.variables[lon][:])
+    values = np.array(dataset[:])
+    origin = {
+        'source': 'PO.DAAC',
+        'url': 'podaac.jpl.nasa.gov/ws'
+    }
+
+    # Removing the downloaded temporary granule before creating the OCW
+    # dataset.
+    d.close()
+    path = os.path.join(os.path.dirname(__file__), granuleName)
+    os.remove(path)
+
+    return Dataset(lats, lons, times, values, variable, name=name, origin=origin)

http://git-wip-us.apache.org/repos/asf/climate/blob/0b2b21a5/ocw/dataset_loader.py
----------------------------------------------------------------------
diff --git a/ocw/dataset_loader.py b/ocw/dataset_loader.py
index 7c90123..ed8fdb0 100644
--- a/ocw/dataset_loader.py
+++ b/ocw/dataset_loader.py
@@ -24,6 +24,7 @@ import ocw.data_source.local as local
 import ocw.data_source.esgf as esgf
 import ocw.data_source.rcmed as rcmed
 import ocw.data_source.dap as dap
+import ocw.data_source.podaac_datasource as podaac
 
 
 class DatasetLoader:
@@ -69,6 +70,8 @@ class DatasetLoader:
         * ``'rcmed'`` - Download the dataset from the Regional Climate Model
                         Evaluation System Database
         * ``'dap'`` - Download the dataset from an OPeNDAP URL
+        * ``'podaac'`` - Download the dataset from Physical Oceanography
+                        Distributed Active Archive Center
 
         Users who wish to load datasets from loaders not described above may
         define their own custom dataset loader function and incorporate it as
@@ -94,7 +97,8 @@ class DatasetLoader:
             'local_split': local.load_dataset_from_multiple_netcdf_files,
             'esgf': esgf.load_dataset,
             'rcmed': rcmed.parameter_dataset,
-            'dap': dap.load
+            'dap': dap.load,
+            'podaac': podaac.load_dataset
         }
 
     def add_source_loader(self, loader_name, loader_func):

http://git-wip-us.apache.org/repos/asf/climate/blob/0b2b21a5/ocw/tests/test_podaac.py
----------------------------------------------------------------------
diff --git a/ocw/tests/test_podaac.py b/ocw/tests/test_podaac.py
index f696d02..66afcb8 100644
--- a/ocw/tests/test_podaac.py
+++ b/ocw/tests/test_podaac.py
@@ -16,9 +16,10 @@
 # under the License.
 
 
-import ocw.data_source.podaac as podaac
+import ocw.data_source.podaac_datasource as podaac
 import unittest
 import os
+import datetime as dt
 from ocw.dataset import Dataset
 
 
@@ -27,13 +28,12 @@ class TestPodaacDataSource(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         cls.datasetId = 'PODAAC-GHCMC-4FM02'
-        cls.datasetShortName = 'CMC0.2deg-CMC-L4-GLOB-v2.0'
         cls.variable = 'sea_ice_fraction'
         cls.name = 'PO.DAAC_test_dataset'
         cls.file_path = os.path.dirname(os.path.abspath(__file__))
         cls.format = '.nc'
         cls.dataset = podaac.load_dataset(
-            cls.variable, cls.datasetId, cls.datasetShortName, cls.name)
+            cls.variable, cls.datasetId, cls.name)
 
     def test_is_dataset(self):
         self.assertTrue(isinstance(self.dataset, Dataset))
@@ -51,8 +51,8 @@ class TestPodaacDataSource(unittest.TestCase):
         self.assertEquals(len(self.dataset.values), 1)
 
     def test_valid_date_conversion(self):
-    start = dt.datetime(2006, 6, 7, 12)
-    self.assertTrue(start == self.dataset.times[0])
+        start = dt.datetime(1991, 9, 2, 12)
+        self.assertTrue(start == self.dataset.times[0])
 
     def test_dataset_origin(self):
         self.assertEquals(self.dataset.origin['source'], 'PO.DAAC')


Mime
View raw message