Merge branch 'master' into CLIMATE-891

apache · Mar 17, 2018 · f8b8c90 · f8b8c90
2 parents 539bb8b + 8e1edc8
commit f8b8c90
Show file tree

Hide file tree

Showing 8 changed files with 203 additions and 157 deletions.
diff --git a/examples/esgf_integration_example.py b/examples/esgf_integration_example.py
@@ -37,31 +37,37 @@
 
 import ocw.data_source.esgf as esgf
 
-if hasattr(ssl, '_create_unverified_context'):
-    ssl._create_default_https_context = ssl._create_unverified_context
-
-dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
-variable = 'zosStderr'
-
-if sys.version_info[0] >= 3:
-    username = input('Enter your ESGF OpenID:\n')
-else:
-    username = raw_input('Enter your ESGF OpenID:\n')
-
-password = getpass(prompt='Enter your ESGF Password:\n')
-
-# Multiple datasets are returned in a list if the ESGF dataset is
-# divided into multiple files.
-datasets = esgf.load_dataset(dataset_id,
-                             variable,
-                             username,
-                             password)
-
-# For this example, our dataset is only stored in a single file so
-# we only need to look at the 0-th value in the returned list.
-ds = datasets[0]
-
-print('\n--------\n')
-print('Variable: ', ds.variable)
-print('Shape: ', ds.values.shape)
-print('A Value: ', ds.values[100][100][100])
+def main():
+    """
+    An example of using the OCW ESGF library.  Connects to an ESGF
+    server and downloads a dataset.
+    """
+    if hasattr(ssl, '_create_unverified_context'):
+        ssl._create_default_https_context = ssl._create_unverified_context
+
+    dataset_id = 'obs4mips.CNES.AVISO.zos.mon.v20110829|esgf-data.jpl.nasa.gov'
+    variable = 'zosStderr'
+
+    if sys.version_info[0] >= 3:
+        username = input('Enter your ESGF OpenID:\n')
+    else:
+        username = raw_input('Enter your ESGF OpenID:\n')
+
+    password = getpass(prompt='Enter your ESGF Password:\n')
+
+    # Multiple datasets are returned in a list if the ESGF dataset is
+    # divided into multiple files.
+    datasets = esgf.load_dataset(dataset_id, variable, username, password)
+
+    # For this example, our dataset is only stored in a single file so
+    # we only need to look at the 0-th value in the returned list.
+    dataset = datasets[0]
+
+    print('\n--------\n')
+    print('Variable: ', dataset.variable)
+    print('Shape: ', dataset.values.shape)
+    print('A Value: ', dataset.values[100][100][100])
+
+
+if __name__ == '__main__':
+    main()
diff --git a/ocw/data_source/esgf.py b/ocw/data_source/esgf.py
@@ -16,9 +16,26 @@
 # specific language governing permissions and limitations
 # under the License.
 #
+"""
+A set of functions to wrap downloading ESGF datasets into an OCW dataset object.
 
+*** Note *** The ESGF data source requires that the user have certain credentials downloaded from
+the ESG. The current version of the module should download these automatically.  Older versions of
+the library will not download them. The solution is to use the WGET script from the EGS to download
+a test dataset to get the credentials. The data source should work as expected then.
+
+"""
 import os
 import sys
+
+import requests
+from bs4 import BeautifulSoup
+
+import ocw.data_source.local as local
+from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
+from ocw.esgf.download import download
+from ocw.esgf.logon import logon
+
 if sys.version_info[0] >= 3:
     from urllib.error import HTTPError
 else:
@@ -27,15 +44,6 @@
     # might be around one day
     from urllib2 import HTTPError
 
-from ocw.esgf.constants import DEFAULT_ESGF_SEARCH
-from ocw.esgf.download import download
-from ocw.esgf.logon import logon
-from ocw.esgf.search import SearchClient
-import ocw.data_source.local as local
-
-from bs4 import BeautifulSoup
-import requests
-
 
 def load_dataset(dataset_id,
                  variable_name,
@@ -44,9 +52,8 @@ def load_dataset(dataset_id,
                  search_url=DEFAULT_ESGF_SEARCH,
                  elevation_index=0,
                  name='',
-                 save_path='/tmp',
-                 **additional_constraints):
-    ''' Load an ESGF dataset.
+                 save_path='/tmp'):
+    """ Load an ESGF dataset.
 
     :param dataset_id: The ESGF ID of the dataset to load.
     :type dataset_id: :mod:`string`
@@ -74,52 +81,45 @@ def load_dataset(dataset_id,
     :param save_path: (Optional) Path to where downloaded files should be saved.
     :type save_path: :mod:`string`
 
-    :param additional_constraints: (Optional) Additional key,value pairs to
-        pass as constraints to the search wrapper. These can be anything found
-        on the ESGF metadata page for a dataset.
-
     :returns: A :class:`list` of :class:`dataset.Dataset` contained the
         requested dataset. If the dataset is stored in multiple files each will
         be loaded into a separate :class:`dataset.Dataset`.
 
     :raises ValueError: If no dataset can be found for the supplied ID and
         variable, or if the requested dataset is a multi-file dataset.
-    '''
-    download_data = _get_file_download_data(url=search_url,
-                                            dataset_id=dataset_id,
-                                            variable=variable_name)
+    """
+    download_data = \
+        _get_file_download_data(url=search_url, dataset_id=dataset_id, variable=variable_name)
 
     datasets = []
+
     for url, var in download_data:
-        _download_files([url],
-                        esgf_username,
-                        esgf_password,
-                        download_directory=save_path)
+        _download_files([url], esgf_username, esgf_password, download_directory=save_path)
 
         file_save_path = os.path.join(save_path, url.split('/')[-1])
-        datasets.append(local.load_file(file_save_path,
-                                        var,
-                                        name=name,
+
+        datasets.append(local.load_file(file_save_path, var, name=name,
                                         elevation_index=elevation_index))
 
     origin = {
         'source': 'esgf',
         'dataset_id': dataset_id,
         'variable': variable_name
     }
-    for ds in datasets:
-        ds.origin = origin
+
+    for dataset in datasets:
+        dataset.origin = origin
 
     return datasets
 
 
 def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):
-    ''''''
+    """"""
     url += '?type=File&dataset_id={}&variable={}'
     url = url.format(dataset_id, variable)
 
-    r = requests.get(url)
-    xml = BeautifulSoup(r.content, "html.parser")
+    raw_data = requests.get(url)
+    xml = BeautifulSoup(raw_data.content, "html.parser")
 
     dont_have_results = not bool(xml.response.result['numfound'])
 
@@ -141,7 +141,7 @@ def _get_file_download_data(dataset_id, variable, url=DEFAULT_ESGF_SEARCH):
 
 
 def _download_files(file_urls, username, password, download_directory='/tmp'):
-    ''''''
+    """"""
     try:
         logon(username, password)
     except HTTPError:

diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
@@ -15,20 +15,19 @@
 #  limitations under the License.
 #
 
-from ocw import dataset as ds
-import ocw.utils as utils
-
 import datetime
+import logging
+
+import netCDF4
 import numpy as np
 import numpy.ma as ma
-from scipy.interpolate import griddata
 import scipy.ndimage
-from scipy.stats import rankdata
-from scipy.ndimage import map_coordinates
-import netCDF4
 from matplotlib.path import Path
+from scipy.interpolate import griddata
+from scipy.ndimage import map_coordinates
 
-import logging
+import ocw.utils as utils
+from ocw import dataset as ds
 
 logger = logging.getLogger(__name__)
 
@@ -149,7 +148,7 @@ def temporal_rebin_with_time_index(target_dataset, nt_average):
      It is the same as the number of time indicies to be averaged.
      length of time dimension in the rebinned dataset) =
      (original time dimension length/nt_average)
-    :type temporal_resolution: integer
+    :type nt_average: integer
 
     :returns: A new temporally rebinned Dataset
     :rtype: :class:`dataset.Dataset`
@@ -505,10 +504,32 @@ def temporal_slice(target_dataset, start_time, end_time):
 
     :raises: ValueError
     '''
+
+    # https://issues.apache.org/jira/browse/CLIMATE-938
+    # netCDF datetimes allow for a variety of calendars while Python has
+    # only one.  This would throw an error about a calendar mismatch when
+    # comparing a Python datetime object to a netcdf datetime object.
+    # Cast the date as best we can so the comparison will compare like
+    # data types  This will still throw an excdeption if the start / end date are
+    # not valid in given calendar.  February 29th in a DatetimeNoLeap calendar for example.
+    slice_start_time = start_time
+    slice_end_time = end_time
+
+    if isinstance(target_dataset.times.item(0), netCDF4.netcdftime._netcdftime.datetime):
+        slice_start_time =\
+            type(target_dataset.times.item(0))(start_time.year, start_time.month, start_time.day,
+                                            start_time.hour, start_time.minute, start_time.second)
+
+        slice_end_time =\
+            type(target_dataset.times.item(0))(end_time.year, end_time.month, end_time.day,
+                                            end_time.hour, end_time.minute, end_time.second)
+
     start_time_index = np.where(
-        target_dataset.times >= start_time)[0][0]
+        target_dataset.times >= slice_start_time)[0][0]
+
     end_time_index = np.where(
-        target_dataset.times <= end_time)[0][-1]
+        target_dataset.times <= slice_end_time)[0][-1]
+
     new_times = target_dataset.times[start_time_index:end_time_index + 1]
     new_values = target_dataset.values[start_time_index:end_time_index + 1, :]
 

diff --git a/ocw/esgf/constants.py b/ocw/esgf/constants.py
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''Module containing constant parameters for ESGF RCMES integration.'''
+"""Module containing constant parameters for ESGF RCMES integration."""
 
 # default location of ESGF user credentials
 ESGF_CREDENTIALS = "~/.esg/credentials.pem"

diff --git a/ocw/esgf/download.py b/ocw/esgf/download.py
@@ -16,12 +16,18 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-'''
+"""
 OCW module to download a file from ESGF.
 
-'''
+"""
+
+from __future__ import print_function
 
 import sys
+from os.path import expanduser, join
+
+from ocw.esgf.constants import ESGF_CREDENTIALS
+
 if sys.version_info[0] >= 3:
     from http.client import HTTPSConnection
     from urllib.request import build_opener
@@ -35,50 +41,57 @@
     from urllib2 import build_opener
     from urllib2 import HTTPCookieProcessor
     from urllib2 import HTTPSHandler
-from os.path import expanduser, join
-
-from ocw.esgf.constants import ESGF_CREDENTIALS
 
 
 class HTTPSClientAuthHandler(HTTPSHandler):
-    '''
+    """
     HTTP handler that transmits an X509 certificate as part of the request
-    '''
+    """
 
     def __init__(self, key, cert):
         HTTPSHandler.__init__(self)
         self.key = key
         self.cert = cert
 
     def https_open(self, req):
+        """
+        Opens the https connection.
+        :param req:  The https request object.
+        :return: An addinfourl object for the request.
+        """
         return self.do_open(self.getConnection, req)
 
     def getConnection(self, host, timeout=300):
-        return HTTPSConnection(host, key_file=self.key, cert_file=self.cert)
+        """
+        Create an HTTPSConnection object.
+        :param host: The ESGF server to connect to.
+        :param timeout: Connection timeout in seconds.
+        :return:
+        """
+        return HTTPSConnection(host, key_file=self.key, cert_file=self.cert, timeout=timeout)
 
 
 def download(url, toDirectory="/tmp"):
-    '''
+    """
     Function to download a single file from ESGF.
-
     :param url: the URL of the file to download
     :param toDirectory: target directory where the file will be written
-    '''
+    """
 
     # setup HTTP handler
-    certFile = expanduser(ESGF_CREDENTIALS)
-    opener = build_opener(HTTPSClientAuthHandler(certFile, certFile))
+    cert_file = expanduser(ESGF_CREDENTIALS)
+    opener = build_opener(HTTPSClientAuthHandler(cert_file, cert_file))
     opener.add_handler(HTTPCookieProcessor())
 
     # download file
-    localFilePath = join(toDirectory, url.split('/')[-1])
-    print("\nDownloading url: %s to local path: %s ..." % (url, localFilePath))
-    localFile = open(localFilePath, 'w')
-    webFile = opener.open(url)
-    localFile.write(webFile.read())
+    local_file_path = join(toDirectory, url.split('/')[-1])
+    print("\nDownloading url: %s to local path: %s ..." % (url, local_file_path))
+    local_file = open(local_file_path, 'w')
+    web_file = opener.open(url)
+    local_file.write(web_file.read())
 
     # cleanup
-    localFile.close()
-    webFile.close()
+    local_file.close()
+    web_file.close()
     opener.close()
     print("... done")