climate-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lewi...@apache.org
Subject [1/3] climate git commit: CLIMATE-316 Add ESGF Download Script to repository
Date Tue, 13 Mar 2018 17:13:59 GMT
Repository: climate
Updated Branches:
  refs/heads/master 513dcc438 -> 5058b3898


CLIMATE-316 Add ESGF Download Script to repository


Project: http://git-wip-us.apache.org/repos/asf/climate/repo
Commit: http://git-wip-us.apache.org/repos/asf/climate/commit/48a18fc6
Tree: http://git-wip-us.apache.org/repos/asf/climate/tree/48a18fc6
Diff: http://git-wip-us.apache.org/repos/asf/climate/diff/48a18fc6

Branch: refs/heads/master
Commit: 48a18fc6dd9719035a0e2d20d662bf1804bce3c9
Parents: e8d8d42
Author: michaelarthuranderson <michael.arthur.anderson@gmail.com>
Authored: Sun Feb 25 15:20:12 2018 -0500
Committer: michaelarthuranderson <michael.arthur.anderson@gmail.com>
Committed: Sun Feb 25 15:20:12 2018 -0500

----------------------------------------------------------------------
 examples/esgf_integration_example.py | 58 +++++++++++--------
 ocw/esgf/constants.py                |  2 +-
 ocw/esgf/download.py                 | 53 ++++++++++-------
 ocw/esgf/logon.py                    | 16 +++---
 ocw/esgf/main.py                     | 96 ++++++++++++++++---------------
 ocw/esgf/search.py                   | 22 ++++---
 6 files changed, 138 insertions(+), 109 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/examples/esgf_integration_example.py
----------------------------------------------------------------------
diff --git a/examples/esgf_integration_example.py b/examples/esgf_integration_example.py
index e939927..e541273 100644
--- a/examples/esgf_integration_example.py
+++ b/examples/esgf_integration_example.py
@@ -30,36 +30,46 @@
 
 """
 
-import ocw.data_source.esgf as esgf
-from getpass import getpass
+from __future__ import print_function
+
 import ssl
 import sys
+from getpass import getpass
+
+import ocw.data_source.esgf as esgf
+
+
+def main():
+    """
+    An example of using the OCW ESGF library.  Connects to an ESGF
+    server and downloads a dataset.
+    """
+    if hasattr(ssl, '_create_unverified_context'):
+        ssl._create_default_https_context = ssl._create_unverified_context
+
+    dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
+    variable = 'zosStderr'
 
-if hasattr(ssl, '_create_unverified_context'):
-    ssl._create_default_https_context = ssl._create_unverified_context
+    if sys.version_info[0] >= 3:
+        username = input('Enter your ESGF OpenID:\n')
+    else:
+        username = raw_input('Enter your ESGF OpenID:\n')
 
-dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
-variable = 'zosStderr'
+    password = getpass(prompt='Enter your ESGF Password:\n')
 
-if sys.version_info[0] >= 3:
-    username = input('Enter your ESGF OpenID:\n')
-else:
-    username = raw_input('Enter your ESGF OpenID:\n')
+    # Multiple datasets are returned in a list if the ESGF dataset is
+    # divided into multiple files.
+    datasets = esgf.load_dataset(dataset_id, variable, username, password)
 
-password = getpass(prompt='Enter your ESGF Password:\n')
+    # For this example, our dataset is only stored in a single file so
+    # we only need to look at the 0-th value in the returned list.
+    dataset = datasets[0]
 
-# Multiple datasets are returned in a list if the ESGF dataset is
-# divided into multiple files.
-datasets = esgf.load_dataset(dataset_id,
-                             variable,
-                             username,
-                             password)
+    print('\n--------\n')
+    print('Variable: ', dataset.variable)
+    print('Shape: ', dataset.values.shape)
+    print('A Value: ', dataset.values[100][100][100])
 
-# For this example, our dataset is only stored in a single file so
-# we only need to look at the 0-th value in the returned list.
-ds = datasets[0]
 
-print('\n--------\n')
-print('Variable: ', ds.variable)
-print('Shape: ', ds.values.shape)
-print('A Value: ', ds.values[100][100][100])
+if __name__ == '__main__':
+    main()

http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/constants.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/constants.py b/ocw/esgf/constants.py
index 8d30848..90218fd 100644
--- a/ocw/esgf/constants.py
+++ b/ocw/esgf/constants.py
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''Module containing constant parameters for ESGF RCMES integration.'''
+"""Module containing constant parameters for ESGF RCMES integration."""
 
 # default location of ESGF user credentials
 ESGF_CREDENTIALS = "~/.esg/credentials.pem"

http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/download.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/download.py b/ocw/esgf/download.py
index 690915c..951a341 100644
--- a/ocw/esgf/download.py
+++ b/ocw/esgf/download.py
@@ -16,12 +16,18 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''
+"""
 OCW module to download a file from ESGF.
 
-'''
+"""
+
+from __future__ import print_function
 
 import sys
+from os.path import expanduser, join
+
+from ocw.esgf.constants import ESGF_CREDENTIALS
+
 if sys.version_info[0] >= 3:
     from http.client import HTTPSConnection
     from urllib.request import build_opener
@@ -35,15 +41,12 @@ else:
     from urllib2 import build_opener
     from urllib2 import HTTPCookieProcessor
     from urllib2 import HTTPSHandler
-from os.path import expanduser, join
-
-from ocw.esgf.constants import ESGF_CREDENTIALS
 
 
 class HTTPSClientAuthHandler(HTTPSHandler):
-    '''
+    """
     HTTP handler that transmits an X509 certificate as part of the request
-    '''
+    """
 
     def __init__(self, key, cert):
         HTTPSHandler.__init__(self)
@@ -51,34 +54,44 @@ class HTTPSClientAuthHandler(HTTPSHandler):
         self.cert = cert
 
     def https_open(self, req):
+        """
+        Opens the https connection.
+        :param req:  The https request object.
+        :return: An addinfourl object for the request.
+        """
         return self.do_open(self.getConnection, req)
 
     def getConnection(self, host, timeout=300):
-        return HTTPSConnection(host, key_file=self.key, cert_file=self.cert)
+        """
+        Create an HTTPSConnection object.
+        :param host: The ESGF server to connect to.
+        :param timeout: Connection timeout in seconds.
+        :return:
+        """
+        return HTTPSConnection(host, key_file=self.key, cert_file=self.cert, timeout=timeout)
 
 
 def download(url, toDirectory="/tmp"):
-    '''
+    """
     Function to download a single file from ESGF.
-
     :param url: the URL of the file to download
     :param toDirectory: target directory where the file will be written
-    '''
+    """
 
     # setup HTTP handler
-    certFile = expanduser(ESGF_CREDENTIALS)
-    opener = build_opener(HTTPSClientAuthHandler(certFile, certFile))
+    cert_file = expanduser(ESGF_CREDENTIALS)
+    opener = build_opener(HTTPSClientAuthHandler(cert_file, cert_file))
     opener.add_handler(HTTPCookieProcessor())
 
     # download file
-    localFilePath = join(toDirectory, url.split('/')[-1])
-    print("\nDownloading url: %s to local path: %s ..." % (url, localFilePath))
-    localFile = open(localFilePath, 'w')
-    webFile = opener.open(url)
-    localFile.write(webFile.read())
+    local_file_path = join(toDirectory, url.split('/')[-1])
+    print("\nDownloading url: %s to local path: %s ..." % (url, local_file_path))
+    local_file = open(local_file_path, 'w')
+    web_file = opener.open(url)
+    local_file.write(web_file.read())
 
     # cleanup
-    localFile.close()
-    webFile.close()
+    local_file.close()
+    web_file.close()
     opener.close()
     print("... done")

http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/logon.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/logon.py b/ocw/esgf/logon.py
index b792cfa..a49335d 100644
--- a/ocw/esgf/logon.py
+++ b/ocw/esgf/logon.py
@@ -16,28 +16,28 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''
+"""
 RCMES module to logon onto the ESGF.
-'''
+"""
 import os
 
 from pyesgf.logon import LogonManager
 
-from ocw.esgf.constants import JPL_MYPROXY_SERVER_DN, JPL_HOSTNAME
+from ocw.esgf.constants import JPL_HOSTNAME, JPL_MYPROXY_SERVER_DN
 
 
 def logon(openid, password):
-    '''
+    """
     Function to retrieve a short-term X.509 certificate that can be used to authenticate
with ESGF.
     The certificate is written in the location ~/.esg/credentials.pem.
     The trusted CA certificates are written in the directory ~/.esg/certificates.
-    '''
+    """
     # Must configure the DN of the JPL MyProxy server if using a JPL openid
     if JPL_HOSTNAME in openid:
         os.environ['MYPROXY_SERVER_DN'] = JPL_MYPROXY_SERVER_DN
 
-    lm = LogonManager()
+    logon_manager = LogonManager()
 
-    lm.logon_with_openid(openid, password, bootstrap=True)
+    logon_manager.logon_with_openid(openid, password, bootstrap=True)
 
-    return lm.is_logged_on()
+    return logon_manager.is_logged_on()

http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/main.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/main.py b/ocw/esgf/main.py
index 5c90042..0fb4656 100644
--- a/ocw/esgf/main.py
+++ b/ocw/esgf/main.py
@@ -16,21 +16,23 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''
+"""
 Example main program for ESGF-RCMES integration.
-    
-'''
 
-# constant parameters
-DATA_DIRECTORY = "/tmp"
+"""
+
+from __future__ import print_function
 
+from ocw.esgf.download import download
 from ocw.esgf.logon import logon
 from ocw.esgf.search import SearchClient
-from ocw.esgf.download import download
+
+# constant parameters
+DATA_DIRECTORY = "/tmp"
 
 
 def main():
-    '''Example driver program'''
+    """Example driver program"""
 
     username = raw_input('Enter your ESGF Username:\n')
     password = raw_input('Enter your ESGF Password:\n')
@@ -42,8 +44,8 @@ def main():
         print("...done.")
 
     # step 2: execute faceted search for files
-    urls = main_obs4mips()
-    #urls = main_cmip5()
+    # urls = main_obs4mips()
+    urls = main_cmip5()
 
     # step 3: download file(s)
     for i, url in enumerate(urls):
@@ -53,66 +55,66 @@ def main():
 
 
 def main_cmip5():
-    '''
+    """
     Example workflow to search for CMIP5 files
-    '''
+    """
 
-    searchClient = SearchClient(
+    search_client = SearchClient(
         searchServiceUrl="http://pcmdi9.llnl.gov/esg-search/search", distrib=False)
 
-    print('\nAvailable projects=%s' % searchClient.getFacets('project'))
-    searchClient.setConstraint(project='CMIP5')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable projects=%s' % search_client.getFacets('project'))
+    search_client.setConstraint(project='CMIP5')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
+
+    print('\nAvailable models=%s' % search_client.getFacets('model'))
+    search_client.setConstraint(model='INM-CM4')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable models=%s' % searchClient.getFacets('model'))
-    searchClient.setConstraint(model='INM-CM4')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable experiments=%s' % search_client.getFacets('experiment'))
+    search_client.setConstraint(experiment='historical')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable experiments=%s' % searchClient.getFacets('experiment'))
-    searchClient.setConstraint(experiment='historical')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable time frequencies=%s' % search_client.getFacets('time_frequency'))
+    search_client.setConstraint(time_frequency='mon')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable time frequencies=%s' %
-          searchClient.getFacets('time_frequency'))
-    searchClient.setConstraint(time_frequency='mon')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable CF standard names=%s' % search_client.getFacets('cf_standard_name'))
+    search_client.setConstraint(cf_standard_name='air_temperature')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable CF standard names=%s' %
-          searchClient.getFacets('cf_standard_name'))
-    searchClient.setConstraint(cf_standard_name='air_temperature')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    urls = search_client.getFiles()
 
-    urls = searchClient.getFiles()
     return urls
 
 
 def main_obs4mips():
-    '''
+    """
     Example workflow to search for obs4MIPs files.
-    '''
+    """
 
-    searchClient = SearchClient(distrib=False)
+    search_client = SearchClient(distrib=False)
 
     # obs4MIPs
-    print('\nAvailable projects=%s' % searchClient.getFacets('project'))
-    searchClient.setConstraint(project='obs4MIPs')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable projects=%s' % search_client.getFacets('project'))
+    search_client.setConstraint(project='obs4MIPs')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable variables=%s' % searchClient.getFacets('variable'))
-    searchClient.setConstraint(variable='hus')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable variables=%s' % search_client.getFacets('variable'))
+    search_client.setConstraint(variable='hus')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable time frequencies=%s' %
-          searchClient.getFacets('time_frequency'))
-    searchClient.setConstraint(time_frequency='mon')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
+    print('\nAvailable time frequencies=%s' % search_client.getFacets('time_frequency'))
+    search_client.setConstraint(time_frequency='mon')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
 
-    print('\nAvailable models=%s' % searchClient.getFacets('model'))
-    searchClient.setConstraint(model='Obs-MLS')
-    print("Number of Datasets=%d" % searchClient.getNumberOfDatasetsi())
+    print('\nAvailable models=%s' % search_client.getFacets('model'))
+    search_client.setConstraint(model='Obs-MLS')
+    print("Number of Datasets=%d" % search_client.getNumberOfDatasets())
+
+    urls = search_client.getFiles()
 
-    urls = searchClient.getFiles()
     return urls
 
+
 if __name__ == '__main__':
     main()

http://git-wip-us.apache.org/repos/asf/climate/blob/48a18fc6/ocw/esgf/search.py
----------------------------------------------------------------------
diff --git a/ocw/esgf/search.py b/ocw/esgf/search.py
index c2f4e12..a807c42 100644
--- a/ocw/esgf/search.py
+++ b/ocw/esgf/search.py
@@ -16,17 +16,19 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''
+"""
 RCMES module to execute a faceted search for ESGF files.
 
-'''
+"""
+
+from __future__ import print_function
 
 from pyesgf.search import SearchConnection
 
 from ocw.esgf.constants import JPL_SEARCH_SERVICE_URL
 
 
-class SearchClient():
+class SearchClient(object):
     """
     Simple ESGF search client for RCMES.
     This class is a thin layer on top of the esgfpy-client package.
@@ -36,7 +38,7 @@ class SearchClient():
     def __init__(self, searchServiceUrl=JPL_SEARCH_SERVICE_URL, distrib=True):
         """
         :param searchServiceUrl: URL of ESGF search service to query
-        :param distrib: True to execute a federation-wide search, 
+        :param distrib: True to execute a federation-wide search,
                         False to search only the specified search service
         """
         connection = SearchConnection(searchServiceUrl, distrib=distrib)
@@ -66,8 +68,10 @@ class SearchClient():
 
     def getFacets(self, facet):
         """
-        :return: a dictionary of (facet value, facet count) for the specified facet and current
constraints.
-        Example (for facet='project'): {u'COUND': 4, u'CMIP5': 2657, u'obs4MIPs': 7} 
+        :return: a dictionary of (facet value, facet count) for the specified facet
+        and current constraints.
+
+        Example (for facet='project'): {u'COUND': 4, u'CMIP5': 2657, u'obs4MIPs': 7}
         """
         return self.context.facet_counts[facet]
 
@@ -82,7 +86,7 @@ class SearchClient():
             print("\nSearching files for dataset=%s with constraints: %s" %
                   (dataset.dataset_id, self.constraints))
             files = dataset.file_context().search(**self.constraints)
-            for file in files:
-                print('Found file=%s' % file.download_url)
-                urls.append(file.download_url)
+            for current_file in files:
+                print('Found file=%s' % current_file.download_url)
+                urls.append(current_file.download_url)
         return urls


Mime
View raw message