Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
Merge branch 'master' into CLIMATE-891
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelArthurAnderson authored Mar 17, 2018
2 parents 539bb8b + 8e1edc8 commit f8b8c90
Show file tree
Hide file tree
Showing 8 changed files with 203 additions and 157 deletions.
62 changes: 34 additions & 28 deletions examples/esgf_integration_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,31 +37,37 @@

import ocw.data_source.esgf as esgf

if hasattr(ssl, '_create_unverified_context'):
ssl._create_default_https_context = ssl._create_unverified_context

dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
variable = 'zosStderr'

if sys.version_info[0] >= 3:
username = input('Enter your ESGF OpenID:\n')
else:
username = raw_input('Enter your ESGF OpenID:\n')

password = getpass(prompt='Enter your ESGF Password:\n')

# Multiple datasets are returned in a list if the ESGF dataset is
# divided into multiple files.
datasets = esgf.load_dataset(dataset_id,
variable,
username,
password)

# For this example, our dataset is only stored in a single file so
# we only need to look at the 0-th value in the returned list.
ds = datasets[0]

print('\n--------\n')
print('Variable: ', ds.variable)
print('Shape: ', ds.values.shape)
print('A Value: ', ds.values[100][100][100])
def main():
"""
An example of using the OCW ESGF library. Connects to an ESGF
server and downloads a dataset.
"""
if hasattr(ssl, '_create_unverified_context'):
ssl._create_default_https_context = ssl._create_unverified_context

dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
variable = 'zosStderr'

if sys.version_info[0] >= 3:
username = input('Enter your ESGF OpenID:\n')
else:
username = raw_input('Enter your ESGF OpenID:\n')

password = getpass(prompt='Enter your ESGF Password:\n')

# Multiple datasets are returned in a list if the ESGF dataset is
# divided into multiple files.
datasets = esgf.load_dataset(dataset_id, variable, username, password)

# For this example, our dataset is only stored in a single file so
# we only need to look at the 0-th value in the returned list.
dataset = datasets[0]

print('\n--------\n')
print('Variable: ', dataset.variable)
print('Shape: ', dataset.values.shape)
print('A Value: ', dataset.values[100][100][100])


if __name__ == '__main__':
main()
66 changes: 33 additions & 33 deletions ocw/data_source/esgf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,26 @@
# specific language governing permissions and limitations
# under the License.
#
"""
A set of functions to wrap downloading ESGF datasets into an OCW dataset object.
*** Note *** The ESGF data source requires that the user have certain credentials downloaded from
the ESG. The current version of the module should download these automatically. Older versions of
the library will not download them. The solution is to use the WGET script from the EGS to download
a test dataset to get the credentials. The data source should work as expected then.
"""
import os
import sys

import requests
from bs4 import BeautifulSoup

import ocw.data_source.local as local
from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
from ocw.esgf.download import download
from ocw.esgf.logon import logon

if sys.version_info[0] >= 3:
from urllib.error import HTTPError
else:
Expand All @@ -27,15 +44,6 @@
# might be around one day
from urllib2 import HTTPError

from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
from ocw.esgf.download import download
from ocw.esgf.logon import logon
from ocw.esgf.search import SearchClient
import ocw.data_source.local as local

from bs4 import BeautifulSoup
import requests


def load_dataset(dataset_id,
variable_name,
Expand All @@ -44,9 +52,8 @@ def load_dataset(dataset_id,
search_url=DEFAULT_ESGF_SEARCH,
elevation_index=0,
name='',
save_path='/tmp',
**additional_constraints):
''' Load an ESGF dataset.
save_path='/tmp'):
""" Load an ESGF dataset.
:param dataset_id: The ESGF ID of the dataset to load.
:type dataset_id: :mod:`string`
Expand Down Expand Up @@ -74,52 +81,45 @@ def load_dataset(dataset_id,
:param save_path: (Optional) Path to where downloaded files should be saved.
:type save_path: :mod:`string`
:param additional_constraints: (Optional) Additional key,value pairs to
pass as constraints to the search wrapper. These can be anything found
on the ESGF metadata page for a dataset.
:returns: A :class:`list` of :class:`dataset.Dataset` contained the
requested dataset. If the dataset is stored in multiple files each will
be loaded into a separate :class:`dataset.Dataset`.
:raises ValueError: If no dataset can be found for the supplied ID and
variable, or if the requested dataset is a multi-file dataset.
'''
download_data = _get_file_download_data(url=search_url,
dataset_id=dataset_id,
variable=variable_name)
"""
download_data = \
_get_file_download_data(url=search_url, dataset_id=dataset_id, variable=variable_name)

datasets = []

for url, var in download_data:
_download_files([url],
esgf_username,
esgf_password,
download_directory=save_path)
_download_files([url], esgf_username, esgf_password, download_directory=save_path)

file_save_path = os.path.join(save_path, url.split('/')[-1])
datasets.append(local.load_file(file_save_path,
var,
name=name,

datasets.append(local.load_file(file_save_path, var, name=name,
elevation_index=elevation_index))

origin = {
'source': 'esgf',
'dataset_id': dataset_id,
'variable': variable_name
}
for ds in datasets:
ds.origin = origin

for dataset in datasets:
dataset.origin = origin

return datasets


def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):
''''''
""""""
url += '?type=File&dataset_id={}&variable={}'
url = url.format(dataset_id, variable)

r = requests.get(url)
xml = BeautifulSoup(r.content, "html.parser")
raw_data = requests.get(url)
xml = BeautifulSoup(raw_data.content, "html.parser")

dont_have_results = not bool(xml.response.result['numfound'])

Expand All @@ -141,7 +141,7 @@ def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):


def _download_files(file_urls, username, password, download_directory='/tmp'):
''''''
""""""
try:
logon(username, password)
except HTTPError:
Expand Down
43 changes: 32 additions & 11 deletions ocw/dataset_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,19 @@
# limitations under the License.
#

from ocw import dataset as ds
import ocw.utils as utils

import datetime
import logging

import netCDF4
import numpy as np
import numpy.ma as ma
from scipy.interpolate import griddata
import scipy.ndimage
from scipy.stats import rankdata
from scipy.ndimage import map_coordinates
import netCDF4
from matplotlib.path import Path
from scipy.interpolate import griddata
from scipy.ndimage import map_coordinates

import logging
import ocw.utils as utils
from ocw import dataset as ds

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -149,7 +148,7 @@ def temporal_rebin_with_time_index(target_dataset, nt_average):
It is the same as the number of time indicies to be averaged.
length of time dimension in the rebinned dataset) =
(original time dimension length/nt_average)
:type temporal_resolution: integer
:type nt_average: integer
:returns: A new temporally rebinned Dataset
:rtype: :class:`dataset.Dataset`
Expand Down Expand Up @@ -505,10 +504,32 @@ def temporal_slice(target_dataset, start_time, end_time):
:raises: ValueError
'''

# https://issues.apache.org/jira/browse/CLIMATE-938
# netCDF datetimes allow for a variety of calendars while Python has
# only one. This would throw an error about a calendar mismatch when
# comparing a Python datetime object to a netcdf datetime object.
# Cast the date as best we can so the comparison will compare like
# data types This will still throw an excdeption if the start / end date are
# not valid in given calendar. February 29th in a DatetimeNoLeap calendar for example.
slice_start_time = start_time
slice_end_time = end_time

if isinstance(target_dataset.times.item(0), netCDF4.netcdftime._netcdftime.datetime):
slice_start_time =\
type(target_dataset.times.item(0))(start_time.year, start_time.month, start_time.day,
start_time.hour, start_time.minute, start_time.second)

slice_end_time =\
type(target_dataset.times.item(0))(end_time.year, end_time.month, end_time.day,
end_time.hour, end_time.minute, end_time.second)

start_time_index = np.where(
target_dataset.times >= start_time)[0][0]
target_dataset.times >= slice_start_time)[0][0]

end_time_index = np.where(
target_dataset.times <= end_time)[0][-1]
target_dataset.times <= slice_end_time)[0][-1]

new_times = target_dataset.times[start_time_index:end_time_index + 1]
new_values = target_dataset.values[start_time_index:end_time_index + 1, :]

Expand Down
2 changes: 1 addition & 1 deletion ocw/esgf/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# specific language governing permissions and limitations
# under the License.
#
'''Module containing constant parameters for ESGF RCMES integration.'''
"""Module containing constant parameters for ESGF RCMES integration."""

# default location of ESGF user credentials
ESGF_CREDENTIALS = "~/.esg/credentials.pem"
Expand Down
53 changes: 33 additions & 20 deletions ocw/esgf/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,18 @@
# specific language governing permissions and limitations
# under the License.
#
'''
"""
OCW module to download a file from ESGF.
'''
"""

from __future__ import print_function

import sys
from os.path import expanduser, join

from ocw.esgf.constants import ESGF_CREDENTIALS

if sys.version_info[0] >= 3:
from http.client import HTTPSConnection
from urllib.request import build_opener
Expand All @@ -35,50 +41,57 @@
from urllib2 import build_opener
from urllib2 import HTTPCookieProcessor
from urllib2 import HTTPSHandler
from os.path import expanduser, join

from ocw.esgf.constants import ESGF_CREDENTIALS


class HTTPSClientAuthHandler(HTTPSHandler):
'''
"""
HTTP handler that transmits an X509 certificate as part of the request
'''
"""

def __init__(self, key, cert):
HTTPSHandler.__init__(self)
self.key = key
self.cert = cert

def https_open(self, req):
"""
Opens the https connection.
:param req: The https request object.
:return: An addinfourl object for the request.
"""
return self.do_open(self.getConnection, req)

def getConnection(self, host, timeout=300):
return HTTPSConnection(host, key_file=self.key, cert_file=self.cert)
"""
Create an HTTPSConnection object.
:param host: The ESGF server to connect to.
:param timeout: Connection timeout in seconds.
:return:
"""
return HTTPSConnection(host, key_file=self.key, cert_file=self.cert, timeout=timeout)


def download(url, toDirectory="/tmp"):
'''
"""
Function to download a single file from ESGF.
:param url: the URL of the file to download
:param toDirectory: target directory where the file will be written
'''
"""

# setup HTTP handler
certFile = expanduser(ESGF_CREDENTIALS)
opener = build_opener(HTTPSClientAuthHandler(certFile, certFile))
cert_file = expanduser(ESGF_CREDENTIALS)
opener = build_opener(HTTPSClientAuthHandler(cert_file, cert_file))
opener.add_handler(HTTPCookieProcessor())

# download file
localFilePath = join(toDirectory, url.split('/')[-1])
print("\nDownloading url: %s to local path: %s ..." % (url, localFilePath))
localFile = open(localFilePath, 'w')
webFile = opener.open(url)
localFile.write(webFile.read())
local_file_path = join(toDirectory, url.split('/')[-1])
print("\nDownloading url: %s to local path: %s ..." % (url, local_file_path))
local_file = open(local_file_path, 'w')
web_file = opener.open(url)
local_file.write(web_file.read())

# cleanup
localFile.close()
webFile.close()
local_file.close()
web_file.close()
opener.close()
print("... done")
Loading

0 comments on commit f8b8c90

Please sign in to comment.