Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
CLIMATE-316 Add ESGF Download Script to repository
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelArthurAnderson committed Feb 25, 2018
1 parent e8d8d42 commit 48a18fc
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 109 deletions.
58 changes: 34 additions & 24 deletions examples/esgf_integration_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,36 +30,46 @@
"""

import ocw.data_source.esgf as esgf
from getpass import getpass
from __future__ import print_function

import ssl
import sys
from getpass import getpass

import ocw.data_source.esgf as esgf


def main():
"""
An example of using the OCW ESGF library. Connects to an ESGF
server and downloads a dataset.
"""
if hasattr(ssl, '_create_unverified_context'):
ssl._create_default_https_context = ssl._create_unverified_context

dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
variable = 'zosStderr'

if hasattr(ssl, '_create_unverified_context'):
ssl._create_default_https_context = ssl._create_unverified_context
if sys.version_info[0] >= 3:
username = input('Enter your ESGF OpenID:\n')
else:
username = raw_input('Enter your ESGF OpenID:\n')

dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
variable = 'zosStderr'
password = getpass(prompt='Enter your ESGF Password:\n')

if sys.version_info[0] >= 3:
username = input('Enter your ESGF OpenID:\n')
else:
username = raw_input('Enter your ESGF OpenID:\n')
# Multiple datasets are returned in a list if the ESGF dataset is
# divided into multiple files.
datasets = esgf.load_dataset(dataset_id, variable, username, password)

password = getpass(prompt='Enter your ESGF Password:\n')
# For this example, our dataset is only stored in a single file so
# we only need to look at the 0-th value in the returned list.
dataset = datasets[0]

# Multiple datasets are returned in a list if the ESGF dataset is
# divided into multiple files.
datasets = esgf.load_dataset(dataset_id,
variable,
username,
password)
print('\n--------\n')
print('Variable: ', dataset.variable)
print('Shape: ', dataset.values.shape)
print('A Value: ', dataset.values[100][100][100])

# For this example, our dataset is only stored in a single file so
# we only need to look at the 0-th value in the returned list.
ds = datasets[0]

print('\n--------\n')
print('Variable: ', ds.variable)
print('Shape: ', ds.values.shape)
print('A Value: ', ds.values[100][100][100])
if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion ocw/esgf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# specific language governing permissions and limitations
# under the License.
#
'''Module containing constant parameters for ESGF RCMES integration.'''
"""Module containing constant parameters for ESGF RCMES integration."""

# default location of ESGF user credentials
ESGF_CREDENTIALS = "~/.esg/credentials.pem"
Expand Down
53 changes: 33 additions & 20 deletions ocw/esgf/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,18 @@
# specific language governing permissions and limitations
# under the License.
#
'''
"""
OCW module to download a file from ESGF.
'''
"""

from __future__ import print_function

import sys
from os.path import expanduser, join

from ocw.esgf.constants import ESGF_CREDENTIALS

if sys.version_info[0] >= 3:
from http.client import HTTPSConnection
from urllib.request import build_opener
Expand All @@ -35,50 +41,57 @@
from urllib2 import build_opener
from urllib2 import HTTPCookieProcessor
from urllib2 import HTTPSHandler
from os.path import expanduser, join

from ocw.esgf.constants import ESGF_CREDENTIALS


class HTTPSClientAuthHandler(HTTPSHandler):
'''
"""
HTTP handler that transmits an X509 certificate as part of the request
'''
"""

def __init__(self, key, cert):
HTTPSHandler.__init__(self)
self.key = key
self.cert = cert

def https_open(self, req):
"""
Opens the https connection.
:param req: The https request object.
:return: An addinfourl object for the request.
"""
return self.do_open(self.getConnection, req)

def getConnection(self, host, timeout=300):
return HTTPSConnection(host, key_file=self.key, cert_file=self.cert)
"""
Create an HTTPSConnection object.
:param host: The ESGF server to connect to.
:param timeout: Connection timeout in seconds.
:return:
"""
return HTTPSConnection(host, key_file=self.key, cert_file=self.cert, timeout=timeout)


def download(url, toDirectory="/tmp"):
'''
"""
Function to download a single file from ESGF.
:param url: the URL of the file to download
:param toDirectory: target directory where the file will be written
'''
"""

# setup HTTP handler
certFile = expanduser(ESGF_CREDENTIALS)
opener = build_opener(HTTPSClientAuthHandler(certFile, certFile))
cert_file = expanduser(ESGF_CREDENTIALS)
opener = build_opener(HTTPSClientAuthHandler(cert_file, cert_file))
opener.add_handler(HTTPCookieProcessor())

# download file
localFilePath = join(toDirectory, url.split('/')[-1])
print("\nDownloading url: %s to local path: %s ..." % (url, localFilePath))
localFile = open(localFilePath, 'w')
webFile = opener.open(url)
localFile.write(webFile.read())
local_file_path = join(toDirectory, url.split('/')[-1])
print("\nDownloading url: %s to local path: %s ..." % (url, local_file_path))
local_file = open(local_file_path, 'w')
web_file = opener.open(url)
local_file.write(web_file.read())

# cleanup
localFile.close()
webFile.close()
local_file.close()
web_file.close()
opener.close()
print("... done")
16 changes: 8 additions & 8 deletions ocw/esgf/logon.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,28 +16,28 @@
# specific language governing permissions and limitations
# under the License.
#
'''
"""
RCMES module to logon onto the ESGF.
'''
"""
import os

from pyesgf.logon import LogonManager

from ocw.esgf.constants import JPL_MYPROXY_SERVER_DN, JPL_HOSTNAME
from ocw.esgf.constants import JPL_HOSTNAME, JPL_MYPROXY_SERVER_DN


def logon(openid, password):
'''
"""
Function to retrieve a short-term X.509 certificate that can be used to authenticate with ESGF.
The certificate is written in the location ~/.esg/credentials.pem.
The trusted CA certificates are written in the directory ~/.esg/certificates.
'''
"""
# Must configure the DN of the JPL MyProxy server if using a JPL openid
if JPL_HOSTNAME in openid:
os.environ['MYPROXY_SERVER_DN'] = JPL_MYPROXY_SERVER_DN

lm = LogonManager()
logon_manager = LogonManager()

lm.logon_with_openid(openid, password, bootstrap=True)
logon_manager.logon_with_openid(openid, password, bootstrap=True)

return lm.is_logged_on()
return logon_manager.is_logged_on()
96 changes: 49 additions & 47 deletions ocw/esgf/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,23 @@
# specific language governing permissions and limitations
# under the License.
#
'''
"""
Example main program for ESGF-RCMES integration.
'''
# constant parameters
DATA_DIRECTORY = "/tmp"
"""

from __future__ import print_function

from ocw.esgf.download import download
from ocw.esgf.logon import logon
from ocw.esgf.search import SearchClient
from ocw.esgf.download import download

# constant parameters
DATA_DIRECTORY = "/tmp"


def main():
'''Example driver program'''
"""Example driver program"""

username = raw_input('Enter your ESGF Username:\n')
password = raw_input('Enter your ESGF Password:\n')
Expand All @@ -42,8 +44,8 @@ def main():
print("...done.")

# step 2: execute faceted search for files
urls = main_obs4mips()
#urls = main_cmip5()
# urls = main_obs4mips()
urls = main_cmip5()

# step 3: download file(s)
for i, url in enumerate(urls):
Expand All @@ -53,66 +55,66 @@ def main():


def main_cmip5():
'''
"""
Example workflow to search for CMIP5 files
'''
"""

searchClient = SearchClient(
search_client = SearchClient(
searchServiceUrl="http://pcmdi9.llnl.gov/esg-search/search", distrib=False)

print('\nAvailable projects=%s' % searchClient.getFacets('project'))
searchClient.setConstraint(project='CMIP5')
print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
print('\nAvailable projects=%s' % search_client.getFacets('project'))
search_client.setConstraint(project='CMIP5')
print("Number of Datasets=%d" % search_client.getNumberOfDatasets())

print('\nAvailable models=%s' % search_client.getFacets('model'))
search_client.setConstraint(model='INM-CM4')
print("Number of Datasets=%d" % search_client.getNumberOfDatasets())

print('\nAvailable models=%s' % searchClient.getFacets('model'))
searchClient.setConstraint(model='INM-CM4')
print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
print('\nAvailable experiments=%s' % search_client.getFacets('experiment'))
search_client.setConstraint(experiment='historical')
print("Number of Datasets=%d" % search_client.getNumberOfDatasets())

print('\nAvailable experiments=%s' % searchClient.getFacets('experiment'))
searchClient.setConstraint(experiment='historical')
print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
print('\nAvailable time frequencies=%s' % search_client.getFacets('time_frequency'))
search_client.setConstraint(time_frequency='mon')
print("Number of Datasets=%d" % search_client.getNumberOfDatasets())

print('\nAvailable time frequencies=%s' %
searchClient.getFacets('time_frequency'))
searchClient.setConstraint(time_frequency='mon')
print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
print('\nAvailable CF standard names=%s' % search_client.getFacets('cf_standard_name'))
search_client.setConstraint(cf_standard_name='air_temperature')
print("Number of Datasets=%d" % search_client.getNumberOfDatasets())

print('\nAvailable CF standard names=%s' %
searchClient.getFacets('cf_standard_name'))
searchClient.setConstraint(cf_standard_name='air_temperature')
print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
urls = search_client.getFiles()

urls = searchClient.getFiles()
return urls


def main_obs4mips():
'''
"""
Example workflow to search for obs4MIPs files.
'''
"""

searchClient = SearchClient(distrib=False)
search_client = SearchClient(distrib=False)

# obs4MIPs
print('\nAvailable projects=%s' % searchClient.getFacets('project'))
searchClient.setConstraint(project='obs4MIPs')
print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
print('\nAvailable projects=%s' % search_client.getFacets('project'))
search_client.setConstraint(project='obs4MIPs')
print("Number of Datasets=%d" % search_client.getNumberOfDatasets())

print('\nAvailable variables=%s' % searchClient.getFacets('variable'))
searchClient.setConstraint(variable='hus')
print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
print('\nAvailable variables=%s' % search_client.getFacets('variable'))
search_client.setConstraint(variable='hus')
print("Number of Datasets=%d" % search_client.getNumberOfDatasets())

print('\nAvailable time frequencies=%s' %
searchClient.getFacets('time_frequency'))
searchClient.setConstraint(time_frequency='mon')
print("Number of Datasets=%d" % searchClient.getNumberOfDatasets())
print('\nAvailable time frequencies=%s' % search_client.getFacets('time_frequency'))
search_client.setConstraint(time_frequency='mon')
print("Number of Datasets=%d" % search_client.getNumberOfDatasets())

print('\nAvailable models=%s' % searchClient.getFacets('model'))
searchClient.setConstraint(model='Obs-MLS')
print("Number of Datasets=%d" % searchClient.getNumberOfDatasetsi())
print('\nAvailable models=%s' % search_client.getFacets('model'))
search_client.setConstraint(model='Obs-MLS')
print("Number of Datasets=%d" % search_client.getNumberOfDatasets())

urls = search_client.getFiles()

urls = searchClient.getFiles()
return urls


if __name__ == '__main__':
main()
Loading

0 comments on commit 48a18fc

Please sign in to comment.