From 8217d12f06987d852f9294da94a5af243116e751 Mon Sep 17 00:00:00 2001
From: Alex Goodman <agoodm@users.noreply.github.com>
Date: Mon, 25 Sep 2017 10:35:20 -0700
Subject: [PATCH 01/16] CLIMATE-926 - Metadata Extractors

---
 RCMES/CORDEX/metadata_extractor.py | 222 +++++++++++++++++++++++++++++
 1 file changed, 222 insertions(+)
 create mode 100644 RCMES/CORDEX/metadata_extractor.py

diff --git a/RCMES/CORDEX/metadata_extractor.py b/RCMES/CORDEX/metadata_extractor.py
new file mode 100644
index 00000000..7351cf4f
--- /dev/null
+++ b/RCMES/CORDEX/metadata_extractor.py
@@ -0,0 +1,222 @@
+import glob
+import os
+
+
+class MetadataExtractor(object):
+    def __init__(self, *paths):
+        """Extracts metadata from data filenames.
+
+        Instances of MetadataExtractor are used to extract metadata from
+        filenames in bulk. Example usage:
+        >>> extractor = MetadataExtractor('/path/to/data')
+
+        Suppose the data in this directory had the following files:
+        pr_*.nc, uas_*.nc, vas_*.nc
+
+        All of the metadata lies in the data attribute:
+        >>> extractor.data
+        [{'filename': /path/to/data/pr_*.nc, 'variable': 'pr'},
+         {'filename': /path/to/data/vas_*.nc, 'variable': 'vas'},
+         {'filename': /path/to/data/uas_*.nc, 'variable': 'uas'}]
+
+        Results can be narrowed down by specifying values for a field:
+        >>> extractor.query(variable='pr')
+        [{'filename': /path/to/data/pr_*.nc, 'variable': 'pr'}]
+
+        Finally, metadata from two sets of extractors can be grouped together
+        based on common field name as follows:
+        >>> extractor.group(extractor2, 'variable')
+
+        This class should only be used as a starting point. We recommend using
+        the included obs4MIPSMetadataExtractor and CORDEXMetadataExtractor
+        subclasses or creating your own subclass for your usecase.
+        """
+        self.paths = paths
+
+    @property
+    def data(self):
+        """
+        The extracted metadata for each file, with all fields listed in
+        the fields attribute included.
+        """
+        return self._data
+
+    @property
+    def paths(self):
+        """
+        Search paths containing the dataset files.
+        """
+        return self._paths
+
+    @paths.setter
+    def paths(self, paths):
+        """
+        Extracts the metadata from scratch when paths are reset.
+        """
+        self._paths = paths
+        self._extract()
+
+    @property
+    def fields(self):
+        """
+        The name of field in the filename, assuming the fully filtered
+        filename conforms to the following convention:
+        filename = <field[0]>_<field[1]>_..._<field[n]>.nc. Using fewer fields
+        than the filename defines is allowed.
+        """
+        fields = ['variable']
+        return fields
+
+    @property
+    def files(self):
+        """
+        List of files (or regular expressions) for each dataset.
+        """
+        files = []
+        for path in self.paths:
+            files.extend(glob.glob(os.path.join(path, '*.nc')))
+        return list(set(self.get_pattern(fname) for fname in files))
+
+    @property
+    def variables(self):
+        """
+        Get the list of variables included accross all the datasets.
+        """
+        return self.get_field('variable')
+
+    def query(self, **kwargs):
+        """
+        Narrow down the list of files by field names.
+        """
+        fields = kwargs.keys()
+        if not set(fields).issubset(set(self.fields)):
+            raise ValueError("Invalid fields: {}. Must be subset of: {}"
+                             .format(fields, self.fields))
+        data = self.data
+        for field, value in kwargs.items():
+            value = value if isinstance(value, list) else [value]
+            data = [meta for meta in data if meta[field] in value]
+        return data
+
+    def group(self, extractor, field):
+        """
+        Compare the data of this extractor with another extractor instance
+        and group each of their metadata together by given field.
+        """
+        # First we only want to consider values of field which are contained
+        # in both extractors
+        subset = self.get_field(field)
+        other_subset = extractor.get_field(field)
+        intersection = list(subset.intersection(other_subset))
+
+        # Next we will group the datasets in each extractor together by common
+        # field values
+        kwargs = {field: intersection}
+        results = self.query(**kwargs)
+
+        groups = []
+        for meta in results:
+            val = meta[field]
+            kwargs.update({field: val})
+            match = extractor.query(**kwargs)
+            groups.append((meta, match))
+
+        return groups
+
+    def get_field(self, field):
+        """
+        Returns only the selected field of the extracted data.
+        """
+        if field not in self.fields:
+            raise ValueError("Invalid field: {}. Must be one of: {}"
+                             .format(field, self.fields))
+        sub = set(meta[field] for meta in self.data)
+        return sub
+
+    def filter_filename(self, fname):
+        """
+        Applies a filter to each individual filename contained in the _files
+        attribute, which is useful if some files within a data set are known
+        to not follow conventions, and "fix" them so that they do.
+        """
+        return os.path.basename(fname)
+
+    def get_pattern(self, fname):
+        """
+        Used to group multiple file datasets together via regular expresssions.
+        The most common convention is to split files by time periods, which
+        are generally the last field in a filename.
+        """
+        base = fname.split('_')
+        pattern = '_'.join(base[:len(self.fields)] + ['*.nc'])
+        return pattern
+
+    def _extract(self):
+        """
+        Do the actual metadata extraction from the list of filename given
+        via filter_filelist(). Additionally, filenames can also be filtered
+        via filter_filename() to remove unwanted characters from the extraction.
+        """
+        self._data = []
+        for fname in self.files:
+            meta = dict(filename=fname)
+
+            # Perform the actual metadata extraction
+            fname = self.filter_filename(fname)
+            meta.update(dict(zip(self.fields, fname.split('_')[:-1])))
+            self._data.append(meta)
+
+
+class obs4MIPSMetadataExtractor(MetadataExtractor):
+    @property
+    def instruments(self):
+        """
+        Get the list of instruments accross all the datasets.
+        """
+        return self.get_field('instrument')
+
+    @property
+    def fields(self):
+        """
+        obs4MIPs fields
+        """
+        fields = ['variable', 'instrument', 'processing_level', 'version']
+        return fields
+
+    def filter_filename(self, fname):
+        """
+        CALIPSO files have odd naming conventions, so we will use
+        a modified version to conform to standard obs4MIPs conventions.
+        """
+        fname = os.path.basename(fname)
+        fname = fname.replace('_obs4MIPs_', '_')
+        fname = fname.replace('calipso', '')
+        fname = fname.replace('Lidarsr532', '')
+        return fname
+
+    def get_pattern(self, fname):
+        """
+        Overriden to deal with CALIPSO filenames
+        """
+        base = fname.split('_')
+        offset = -2 if len(base) != 5 else -1
+        pattern = '_'.join(base[:offset] + ['*.nc'])
+        return pattern
+        
+        
+class CORDEXMetadataExtractor(MetadataExtractor):
+    @property
+    def models(self):
+        """
+        Get the list of models accross all the datasets.
+        """
+        return self.get_field('models')
+
+    @property
+    def fields(self):
+        """
+        obs4MIPs fields
+        """
+        fields = ['variable', 'domain', 'driving_model', 'experiment',
+                  'ensemble', 'model', 'version', 'time_step']
+        return fields

From ec81037a21dbf2cf1999422127bbe924e1072c4a Mon Sep 17 00:00:00 2001
From: Alex Goodman <agoodm@users.noreply.github.com>
Date: Mon, 25 Sep 2017 10:41:07 -0700
Subject: [PATCH 02/16] Fix model attribute in CORDEXMetadataExtractor

---
 RCMES/CORDEX/metadata_extractor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RCMES/CORDEX/metadata_extractor.py b/RCMES/CORDEX/metadata_extractor.py
index 7351cf4f..cb18946b 100644
--- a/RCMES/CORDEX/metadata_extractor.py
+++ b/RCMES/CORDEX/metadata_extractor.py
@@ -210,7 +210,7 @@ def models(self):
         """
         Get the list of models accross all the datasets.
         """
-        return self.get_field('models')
+        return self.get_field('model')
 
     @property
     def fields(self):

From ba93542a75172b98f556747e520a0239ca8e6012 Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Tue, 26 Sep 2017 13:38:30 -0700
Subject: [PATCH 03/16] CLIMATE 919 - load_dataset_from_multiple_netcdf_files()
 does not have default variable units

---
 ocw/data_source/local.py | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index be23bb26..734b0918 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -170,7 +170,7 @@ def load_WRF_2d_files(file_path=None,
         values0 = file_object.variables[variable_name][:]
         if isinstance(values0, numpy.ndarray):
             values0 = ma.array(values0,
-                               mask=numpy.zeros(values0.shape))   
+                               mask=numpy.zeros(values0.shape))
         if ifile == 0:
             values = values0
             variable_unit = file_object.variables[variable_name].units
@@ -260,7 +260,11 @@ def load_file(file_path,
     times = utils.decode_time_values(netcdf, time_name)
     times = numpy.array(times)
     values = ma.array(netcdf.variables[variable_name][:])
-    variable_unit = netcdf.variables[variable_name].units
+    if not variable_unit:
+        if hasattr(netcdf.variables[variable_name], 'units'):
+            variable_unit = netcdf.variables[variable_name].units
+        else:
+            variable_unit = '1'
 
     # If the values are 4D then we need to strip out the elevation index
     if len(values.shape) == 4:
@@ -314,7 +318,7 @@ def load_multiple_files(file_path,
     :type file_path: :mod:`string`
     :param dataset_name: a list of dataset names. Data filenames must include the elements of dataset_name list.
     :type dataset_name: :mod:'list'
-    :param generic_dataset_name: If false, data filenames must include the elements of dataset_name list. 
+    :param generic_dataset_name: If false, data filenames must include the elements of dataset_name list.
     :type generic_dataset_name: :mod:'bool'
     :param variable_name: The variable name to load from the NetCDF file.
     :type variable_name: :mod:`string`
@@ -356,11 +360,11 @@ def load_multiple_files(file_path,
                                   lat_name=lat_name, lon_name=lon_name, time_name=time_name))
 
     else:
-        data_name = [i for i in dataset_name]    
+        data_name = [i for i in dataset_name]
         pattern = [['*'+i+'*'] for i in dataset_name]
         if file_path[-1] != '/':
             file_path = file_path+'/'
-        
+
         ndata = len(dataset_name)
         for idata in numpy.arange(ndata):
             datasets.append(load_dataset_from_multiple_netcdf_files(variable_name,
@@ -536,7 +540,8 @@ def load_dataset_from_multiple_netcdf_files(variable_name, variable_unit=None,
         else:
             data_values = numpy.concatenate((data_values, values0))
     times = numpy.array(times)
-    return Dataset(lats, lons, times, data_values, 
+    variable_unit = dataset0.units if not variable_unit else variable_unit
+    return Dataset(lats, lons, times, data_values,
                    variable_name, units=variable_unit,name=name)
 
 
@@ -674,7 +679,7 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None,
                          filename_pattern=None,
                          filelist=None,
                          variable_name='precipitationCal',
-                         user_mask_file=None, 
+                         user_mask_file=None,
                          mask_variable_name='mask',
                          user_mask_values=[10],
                          longitude_name='lon',
@@ -694,11 +699,11 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None,
     :type name: :mod:`string`
     :user_mask_file: user's own gridded mask file(a netCDF file name)
     :type name: :mod:`string`
-    :mask_variable_name: mask variables in user_mask_file    
+    :mask_variable_name: mask variables in user_mask_file
     :type name: :mod:`string`
-    :longitude_name: longitude variable in user_mask_file    
+    :longitude_name: longitude variable in user_mask_file
     :type name: :mod:`string`
-    :latitude_name: latitude variable in user_mask_file    
+    :latitude_name: latitude variable in user_mask_file
     :type name: :mod:`string`
     :param user_mask_values: grid points where mask_variable == user_mask_value will be extracted.
     :type user_mask_values: list of strings
@@ -731,7 +736,7 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None,
             mask_longitude = file_object.variables[longitude_name][:]
             mask_latitude = file_object.variables[latitude_name][:]
             spatial_mask = utils.regrid_spatial_mask(lons,lats,
-                                                     mask_longitude, mask_latitude, 
+                                                     mask_longitude, mask_latitude,
                                                      mask_variable,
                                                      user_mask_values)
             y_index, x_index = numpy.where(spatial_mask == 0)
@@ -739,7 +744,7 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None,
         file_object = h5py.File(file)
         values0 = ma.transpose(ma.masked_less(
             file_object['Grid'][variable_name][:], 0.))
-        values_masked = values0[y_index, x_index] 
+        values_masked = values0[y_index, x_index]
         values_masked = ma.expand_dims(values_masked, axis=0)
         if ifile == 0:
             values = values_masked
@@ -747,5 +752,3 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None,
             values = ma.concatenate((values, values_masked))
         file_object.close()
     return values
-
-

From d868be10eb3862798f456e45e5ae2d3687dd401c Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Tue, 26 Sep 2017 16:03:09 -0700
Subject: [PATCH 04/16] Allow acronyms for CORDEX domains

---
 ocw/utils.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/ocw/utils.py b/ocw/utils.py
index db4b9268..0a798b08 100755
--- a/ocw/utils.py
+++ b/ocw/utils.py
@@ -505,33 +505,33 @@ def CORDEX_boundary(domain_name):
     :param domain_name: CORDEX domain name (http://www.cordex.org/)
     :type domain_name: :mod:'string'
     '''
-    if domain_name == 'southamerica':
+    if domain_name == 'southamerica' or domain_name == 'SAM':
         return -57.61, 18.50, 254.28 - 360., 343.02 - 360.
-    elif domain_name == 'centralamerica':
+    elif domain_name == 'centralamerica' or domain_name == 'CAM':
         return -19.46, 34.83, 235.74 - 360., 337.78 - 360.
-    elif domain_name == 'northamerica':
+    elif domain_name == 'northamerica' or domain_name == 'NAM':
         return 12.55, 75.88, 189.26 - 360., 336.74 - 360.
-    elif domain_name == 'europe':
+    elif domain_name == 'europe' or domain_name == 'EUR':
         return 22.20, 71.84, 338.23 - 360., 64.4
-    elif domain_name == 'africa':
+    elif domain_name == 'africa' or domain_name == 'AFR':
         return -45.76, 42.24, 335.36 - 360., 60.28
-    elif domain_name == 'southasia':
+    elif domain_name == 'southasia' or domain_name == 'WAS':
         return -15.23, 45.07, 19.88, 115.55
-    elif domain_name == 'eastasia':
+    elif domain_name == 'eastasia' or domain_name == 'EAS':
         return -0.10, 61.90, 51.59, 179.99
-    elif domain_name == 'centralasia':
+    elif domain_name == 'centralasia' or domain_name == 'CAS':
         return 18.34, 69.37, 11.05, 139.13
-    elif domain_name == 'australasia':
+    elif domain_name == 'australasia' or domain_name == 'AUS':
         return -52.36, 12.21, 89.25, 179.99
-    elif domain_name == 'antartica':
+    elif domain_name == 'antartica' or domain_name == 'ANT':
         return -89.48, -56.00, -179.00, 179.00
-    elif domain_name == 'artic':
+    elif domain_name == 'artic' or domain_name == 'ARC':
         return 46.06, 89.50, -179.00, 179.00
-    elif domain_name == 'mediterranean':
+    elif domain_name == 'mediterranean' or domain_name == 'MED':
         return 25.63, 56.66, 339.79 - 360.00, 50.85
-    elif domain_name == 'middleeastnorthafrica':
+    elif domain_name == 'middleeastnorthafrica' or domain_name == 'MNA':
         return -7.00, 45.00, 333.00 - 360.00, 76.00
-    elif domain_name == 'southeastasia':
+    elif domain_name == 'southeastasia' or domain_name == 'SEA':
         return -15.14, 27.26, 89.26, 146.96
     else:
         err = "Invalid CORDEX domain name"
@@ -627,7 +627,7 @@ def _force_unicode(s, encoding='utf-8'):
     '''
     if hasattr(s, 'decode'):
         s = s.decode(encoding=encoding)
-        
+
     return s
 
 def calculate_temporal_trends(dataset):
@@ -649,7 +649,7 @@ def calculate_temporal_trends(dataset):
             if dataset.values[:,iy,ix].count() == nt:
                 trend[iy,ix], slope_err[iy,ix] = calculate_temporal_trend_of_time_series(
                                                x, dataset.values[:,iy,ix])
-    
+
     return ma.masked_equal(trend, -999.), ma.masked_equal(slope_err, -999.)
 
 def calculate_ensemble_temporal_trends(timeseries_array, number_of_samples=1000):
@@ -663,20 +663,20 @@ def calculate_ensemble_temporal_trends(timeseries_array, number_of_samples=1000)
     :returns: temporal trend and estimated error from bootstrapping
     :rtype: :float:`float','float'
     '''
-   
+
     nmodels, nt = timeseries_array.shape
     x = np.arange(nt)
     sampled_trend = np.zeros(number_of_samples)
     ensemble_trend, _ = calculate_temporal_trend_of_time_series(
                         x, np.mean(timeseries_array, axis=0))
-    
+
     for isample in np.arange(number_of_samples):
         index = np.random.choice(nmodels, size=nmodels, replace=True)
         random_ensemble = np.mean(timeseries_array[index, :], axis=0)
         sampled_trend[isample], _ = calculate_temporal_trend_of_time_series(
                                     x, random_ensemble)
     return ensemble_trend, np.std(sampled_trend, ddof=1)
-     
+
 
 def calculate_temporal_trend_of_time_series(x,y):
     ''' Calculate least-square trends (a) in y = ax+b and a's standard error

From 3e66d94fce1935aac2a8cc88025d50858d78f3ba Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Tue, 26 Sep 2017 16:10:44 -0700
Subject: [PATCH 05/16] Default vales for subplot grid in RCMES script

---
 RCMES/metrics_and_plots.py | 65 ++++++++++++++++++--------------------
 RCMES/run_RCMES.py         |  8 ++---
 2 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/RCMES/metrics_and_plots.py b/RCMES/metrics_and_plots.py
index 848af9de..78c2f14f 100644
--- a/RCMES/metrics_and_plots.py
+++ b/RCMES/metrics_and_plots.py
@@ -27,7 +27,7 @@
 import numpy as np
 import numpy.ma as ma
 import matplotlib.pyplot as plt
-from mpl_toolkits.basemap import Basemap 
+from mpl_toolkits.basemap import Basemap
 from matplotlib import rcParams
 from matplotlib.patches import Polygon
 import string
@@ -46,7 +46,7 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets
                                  model_datasets, # list of target datasets for the evaluation
                                  [map_of_bias, map_of_bias])
     # run the evaluation (bias calculation)
-    bias_evaluation.run() 
+    bias_evaluation.run()
 
     rcm_bias = bias_evaluation.results[0]
 
@@ -57,7 +57,9 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets
     lon_min = obs_dataset.lons.min()
     lon_max = obs_dataset.lons.max()
 
-    string_list = list(string.ascii_lowercase) 
+    string_list = list(string.ascii_lowercase)
+    nmodels = len(model_datasets)
+    row, column = plotter._best_grid_shape((row, column), nmodels + 1)
     ax = fig.add_subplot(row,column,1)
     if map_projection == 'npstere':
         m = Basemap(ax=ax, projection ='npstere', boundinglat=lat_min, lon_0=0,
@@ -79,9 +81,9 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets
     max = m.contourf(x,y,obs_clim,levels = plotter._nice_intervals(obs_dataset.values, 10), extend='both',cmap='rainbow')
     ax.annotate('(a) \n' + obs_name,xy=(lon_min, lat_min))
     cax = fig.add_axes([0.02, 1.-float(1./row)+1./row*0.25, 0.01, 1./row*0.5])
-    plt.colorbar(max, cax = cax) 
+    plt.colorbar(max, cax = cax)
     clevs = plotter._nice_intervals(rcm_bias, 11)
-    for imodel in np.arange(len(model_datasets)):
+    for imodel in np.arange(nmodels):
 
         ax = fig.add_subplot(row, column,2+imodel)
         if map_projection == 'npstere':
@@ -97,7 +99,7 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets
         ax.annotate('('+string_list[imodel+1]+')  \n '+model_names[imodel],xy=(lon_min, lat_min))
 
     cax = fig.add_axes([0.91, 0.5, 0.015, 0.4])
-    plt.colorbar(max, cax = cax) 
+    plt.colorbar(max, cax = cax)
 
     plt.subplots_adjust(hspace=0.01,wspace=0.05)
 
@@ -122,16 +124,16 @@ def Taylor_diagram_spatial_pattern_of_multiyear_climatology(obs_dataset, obs_nam
                                  [taylor_diagram])
 
     # run the evaluation (bias calculation)
-    taylor_evaluation.run() 
+    taylor_evaluation.run()
 
     taylor_data = taylor_evaluation.results[0]
 
     plotter.draw_taylor_diagram(taylor_data, model_names, obs_name, file_name, pos='upper right',frameon=False)
 
-def Time_series_subregion(obs_subregion_mean, obs_name, model_subregion_mean, model_names, seasonal_cycle, 
+def Time_series_subregion(obs_subregion_mean, obs_name, model_subregion_mean, model_names, seasonal_cycle,
                           file_name, row, column, x_tick=['']):
 
-    nmodel, nt, nregion = model_subregion_mean.shape  
+    nmodel, nt, nregion = model_subregion_mean.shape
 
     if seasonal_cycle:
         obs_data = ma.mean(obs_subregion_mean.reshape([1,nt/12,12,nregion]), axis=1)
@@ -140,19 +142,19 @@ def Time_series_subregion(obs_subregion_mean, obs_name, model_subregion_mean, mo
     else:
         obs_data = obs_subregion_mean
         model_data = model_subregion_mean
-        
+
     x_axis = np.arange(nt)
     x_tick_values = x_axis
 
     fig = plt.figure()
     rcParams['xtick.labelsize'] = 6
     rcParams['ytick.labelsize'] = 6
-  
+
     for iregion in np.arange(nregion):
-        ax = fig.add_subplot(row, column, iregion+1) 
+        ax = fig.add_subplot(row, column, iregion+1)
         x_tick_labels = ['']
         if iregion+1  > column*(row-1):
-            x_tick_labels = x_tick 
+            x_tick_labels = x_tick
         else:
             x_tick_labels=['']
         ax.plot(x_axis, obs_data[0, :, iregion], color='r', lw=2, label=obs_name)
@@ -162,8 +164,8 @@ def Time_series_subregion(obs_subregion_mean, obs_name, model_subregion_mean, mo
         ax.set_xticks(x_tick_values)
         ax.set_xticklabels(x_tick_labels)
         ax.set_title('Region %02d' % (iregion+1), fontsize=8)
-    
-    ax.legend(bbox_to_anchor=(-0.2, row/2), loc='center' , prop={'size':7}, frameon=False)  
+
+    ax.legend(bbox_to_anchor=(-0.2, row/2), loc='center' , prop={'size':7}, frameon=False)
 
     fig.subplots_adjust(hspace=0.7, wspace=0.5)
     fig.savefig(file_name, dpi=600, bbox_inches='tight')
@@ -172,7 +174,7 @@ def Portrait_diagram_subregion(obs_subregion_mean, obs_name, model_subregion_mea
                                file_name, normalize=True):
 
     nmodel, nt, nregion = model_subregion_mean.shape
-    
+
     if seasonal_cycle:
         obs_data = ma.mean(obs_subregion_mean.reshape([1,nt/12,12,nregion]), axis=1)
         model_data = ma.mean(model_subregion_mean.reshape([nmodel,nt/12,12,nregion]), axis=1)
@@ -193,35 +195,35 @@ def Portrait_diagram_subregion(obs_subregion_mean, obs_name, model_subregion_mea
             subregion_metrics[2, iregion, imodel] = metrics.calc_rmse(model_data[imodel, :, iregion], obs_data[0, :, iregion])
             # Fourth metric: correlation
             subregion_metrics[3, iregion, imodel] = metrics.calc_correlation(model_data[imodel, :, iregion], obs_data[0, :, iregion])
-   
+
     if normalize:
         for iregion in np.arange(nregion):
-            subregion_metrics[0, iregion, : ] = subregion_metrics[0, iregion, : ]/ma.std(obs_data[0, :, iregion])*100. 
-            subregion_metrics[1, iregion, : ] = subregion_metrics[1, iregion, : ]*100. 
-            subregion_metrics[2, iregion, : ] = subregion_metrics[2, iregion, : ]/ma.std(obs_data[0, :, iregion])*100. 
+            subregion_metrics[0, iregion, : ] = subregion_metrics[0, iregion, : ]/ma.std(obs_data[0, :, iregion])*100.
+            subregion_metrics[1, iregion, : ] = subregion_metrics[1, iregion, : ]*100.
+            subregion_metrics[2, iregion, : ] = subregion_metrics[2, iregion, : ]/ma.std(obs_data[0, :, iregion])*100.
 
     region_names = ['R%02d' % i for i in np.arange(nregion)+1]
 
     for imetric, metric in enumerate(['bias','std','RMSE','corr']):
-        plotter.draw_portrait_diagram(subregion_metrics[imetric, :, :], region_names, model_names, file_name+'_'+metric, 
-                                      xlabel='model',ylabel='region')             
+        plotter.draw_portrait_diagram(subregion_metrics[imetric, :, :], region_names, model_names, file_name+'_'+metric,
+                                      xlabel='model',ylabel='region')
 
 def Map_plot_subregion(subregions, ref_dataset, directory):
-  
-    lons, lats = np.meshgrid(ref_dataset.lons, ref_dataset.lats) 
+
+    lons, lats = np.meshgrid(ref_dataset.lons, ref_dataset.lats)
     fig = plt.figure()
     ax = fig.add_subplot(111)
     m = Basemap(ax=ax, projection='cyl',llcrnrlat = lats.min(), urcrnrlat = lats.max(),
                 llcrnrlon = lons.min(), urcrnrlon = lons.max(), resolution = 'l')
     m.drawcoastlines(linewidth=0.75)
     m.drawcountries(linewidth=0.75)
-    m.etopo()  
-    x, y = m(lons, lats) 
+    m.etopo()
+    x, y = m(lons, lats)
     #subregion_array = ma.masked_equal(subregion_array, 0)
     #max=m.contourf(x, y, subregion_array, alpha=0.7, cmap='Accent')
     for subregion in subregions:
-        draw_screen_poly(subregion[1], m, 'w') 
-        plt.annotate(subregion[0],xy=(0.5*(subregion[1][2]+subregion[1][3]), 0.5*(subregion[1][0]+subregion[1][1])), ha='center',va='center', fontsize=8) 
+        draw_screen_poly(subregion[1], m, 'w')
+        plt.annotate(subregion[0],xy=(0.5*(subregion[1][2]+subregion[1][3]), 0.5*(subregion[1][0]+subregion[1][1])), ha='center',va='center', fontsize=8)
     fig.savefig(directory+'map_subregion', bbox_inches='tight')
 
 def draw_screen_poly(boundary_array, m, linecolor='k'):
@@ -238,10 +240,3 @@ def draw_screen_poly(boundary_array, m, linecolor='k'):
     xy = zip(x,y)
     poly = Polygon( xy, facecolor='none',edgecolor=linecolor )
     plt.gca().add_patch(poly)
-    
-    
-   
-
-    
-
-    
diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py
index 027d2e26..ba8ef2a2 100644
--- a/RCMES/run_RCMES.py
+++ b/RCMES/run_RCMES.py
@@ -94,7 +94,7 @@ def load_datasets_from_config(extra_opts, *loader_opts):
 # (first) dataset. We should instead make this a parameter for each
 # loader and Dataset objects.
 fact = data_info[0].pop('multiplying_factor', 1)
-    
+
 """ Step 1: Load the datasets """
 print('Loading datasets:\n{}'.format(data_info))
 datasets = load_datasets_from_config(extra_opts, *data_info)
@@ -262,7 +262,7 @@ def load_datasets_from_config(extra_opts, *loader_opts):
 
         print('metrics {0}/{1}: {2}'.format(imetric, nmetrics, metrics_name))
         if metrics_name == 'Map_plot_bias_of_multiyear_climatology':
-            row, column = plot_info['subplots_array']
+            row, column = plot_info.get('subplots_array', (1, 1))
             if 'map_projection' in plot_info.keys():
                 Map_plot_bias_of_multiyear_climatology(
                     reference_dataset, reference_name, target_datasets, target_names,
@@ -279,7 +279,7 @@ def load_datasets_from_config(extra_opts, *loader_opts):
         elif config['use_subregions']:
             if (metrics_name == 'Timeseries_plot_subregion_interannual_variability'
                 and average_each_year):
-                row, column = plot_info['subplots_array']
+                row, column = plot_info.get('subplots_array', (1, 1))
                 Time_series_subregion(
                     reference_subregion_mean, reference_name, target_subregion_mean,
                     target_names, False, file_name, row, column,
@@ -288,7 +288,7 @@ def load_datasets_from_config(extra_opts, *loader_opts):
 
             if (metrics_name == 'Timeseries_plot_subregion_annual_cycle'
                 and not average_each_year and month_start==1 and month_end==12):
-                row, column = plot_info['subplots_array']
+                row, column = plot_info.get('subplots_array', (1, 1))
                 Time_series_subregion(
                     reference_subregion_mean, reference_name,
                     target_subregion_mean, target_names, True,

From eacee1f5a2569a191d4961621d37281ccdf06017 Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Tue, 26 Sep 2017 16:19:39 -0700
Subject: [PATCH 06/16] Allow acronyms for CORDEX domains

---
 RCMES/run_RCMES.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py
index ba8ef2a2..f729f98b 100644
--- a/RCMES/run_RCMES.py
+++ b/RCMES/run_RCMES.py
@@ -79,7 +79,10 @@ def load_datasets_from_config(extra_opts, *loader_opts):
     min_lon = space_info['min_lon']
     max_lon = space_info['max_lon']
 else:
-    min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(space_info['boundary_type'][6:].replace(" ","").lower())
+    domain = space_info['boundary_type']
+    if domain.startswith('CORDEX '):
+        domain = domain.replace('CORDEX ', '').lower()
+    min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(domain)
 
 # Additional arguments for the DatasetLoader
 extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon,

From 8168d2042390712f9d58051633d93c267a002756 Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Tue, 26 Sep 2017 16:21:09 -0700
Subject: [PATCH 07/16] Default values for filename_pattern in split file
 loader

---
 ocw/data_source/local.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 734b0918..896032f9 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -508,6 +508,7 @@ def load_dataset_from_multiple_netcdf_files(variable_name, variable_unit=None,
     '''
     nc_files = []
     if not file_list:
+        filename_pattern = [''] if not filename_pattern else filename_pattern
         for pattern in filename_pattern:
             nc_files.extend(glob(file_path + pattern))
     else:

From 368e6969ca992e29b7bf49906449c4c92a913d2f Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Tue, 26 Sep 2017 16:33:14 -0700
Subject: [PATCH 08/16] Ignore start and end times when maximum_overlap is True

---
 RCMES/run_RCMES.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py
index f729f98b..76412998 100644
--- a/RCMES/run_RCMES.py
+++ b/RCMES/run_RCMES.py
@@ -68,8 +68,13 @@ def load_datasets_from_config(extra_opts, *loader_opts):
 temporal_resolution = time_info['temporal_resolution']
 
 # Read time info
-start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d')
-end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d')
+maximum_overlap_period = space_info.get('maximum_overlap_period', False)
+if not maximum_overlap_period:
+    start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d')
+    end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d')
+else:
+    # These values will be determined after datasets are loaded
+    start_time, end_time = None, None
 
 # Read space info
 space_info = config['space']
@@ -113,7 +118,7 @@ def load_datasets_from_config(extra_opts, *loader_opts):
 
 """ Step 2: Subset the data for temporal and spatial domain """
 # Create a Bounds object to use for subsetting
-if time_info['maximum_overlap_period']:
+if maximum_overlap_period:
     start_time, end_time = utils.get_temporal_overlap(datasets)
     print('Maximum overlap period')
     print('start_time: {}'.format(start_time))

From 3539aa2be6408140e33d82fb413089c62fcec70f Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Tue, 26 Sep 2017 16:41:06 -0700
Subject: [PATCH 09/16] CLIMATE-925 - CORDEX Config File Template

---
 RCMES/CORDEX/templates/CORDEX.yaml.template | 57 +++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 RCMES/CORDEX/templates/CORDEX.yaml.template

diff --git a/RCMES/CORDEX/templates/CORDEX.yaml.template b/RCMES/CORDEX/templates/CORDEX.yaml.template
new file mode 100644
index 00000000..daf6ec52
--- /dev/null
+++ b/RCMES/CORDEX/templates/CORDEX.yaml.template
@@ -0,0 +1,57 @@
+{% set domain = models_info[0].domain %}
+{% set instrument = obs_info.instrument %}
+{% set variable = obs_info.variable %}
+{% set basename = [variable, instrument, domain, season]|join('_') %}
+workdir: {{ [output_dir, domain, instrument, variable, season]|join('/') }}
+output_netcdf_filename: {{ basename }}.nc
+
+# (RCMES will temporally subset data between month_start and month_end.
+# If average_each_year is True (False), seasonal mean in each year is (not) calculated and used for metrics calculation.)
+time:
+    maximum_overlap_period: True
+    temporal_resolution: monthly
+{% if season == "winter" %}
+    month_start: 12
+    month_end: 2
+{% elif season == "summer" %}
+    month_start: 6
+    month_end: 8
+{% else %}
+    month_start: 1
+    month_end: 12
+{% endif %}
+    average_each_year: True
+
+space:
+    boundary_type: {{ domain[:3] }}
+
+regrid:
+    regrid_on_reference: True
+
+datasets:
+  - loader_name: local_split
+    name: {{ instrument }}
+    file_path: {{ obs_info.filename }}
+    variable_name: {{ variable }}
+{% for model_info in models_info %}
+  - loader_name: local_split
+    name: {{ model_info.model }}
+    file_path: {{ model_info.filename }}
+    variable_name: {{ variable }}
+    lat_name: lat
+    lon_name: lon
+{% endfor %}
+
+number_of_metrics_and_plots: 2
+
+metrics1: Map_plot_bias_of_multiyear_climatology
+
+plots1:
+    file_name: {{ basename }}_bias
+
+metrics2: Taylor_diagram_spatial_pattern_of_multiyear_climatology
+
+plots2:
+    file_name: {{ basename }}_taylor
+
+use_subregions: False

From cb9428413baccf8128df7855467838ce1600049c Mon Sep 17 00:00:00 2001
From: Alex Goodman <agoodm@users.noreply.github.com>
Date: Wed, 27 Sep 2017 18:35:18 -0700
Subject: [PATCH 10/16] CLIMATE-928 - temporal_subset should trim edges of
 dataset times to ensure months divide evenly into years

---
 ocw/dataset_processor.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 2097cc42..22278927 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -75,13 +75,18 @@ def temporal_subset(target_dataset, month_start, month_end,
                              name=target_dataset.name)
 
     if average_each_year:
+        new_times = new_dataset.times
         nmonth = len(month_index)
-        ntime = new_dataset.times.size
+        ntime = new_times.size
         nyear = ntime // nmonth
         if ntime % nmonth != 0:
-            raise ValueError("Number of times in dataset ({}) does not "
-                             "divide evenly into {} year(s)."
-                             .format(ntime, nyear))
+            logger.warning("Number of times in dataset ({}) does not "
+                           "divide evenly into {} year(s). Trimming data..."
+                           .format(ntime, nyear))
+            s_mon = new_times[0].month
+            e_mon = new_times[-1].month
+            new_times = new_times[13-s_mon:-e_mon]
+            nyear = new_times.size // nmonth
 
         averaged_time = []
         ny, nx = target_dataset.values.shape[1:]
@@ -92,7 +97,7 @@ def temporal_subset(target_dataset, month_start, month_end,
             center_index = int(nmonth / 2 + iyear * nmonth)
             if nmonth == 1:
                 center_index = iyear
-            averaged_time.append(new_dataset.times[center_index])
+            averaged_time.append(new_times[center_index])
             averaged_values[iyear, :] = ma.average(new_dataset.values[
                 nmonth * iyear: nmonth * iyear + nmonth, :], axis=0)
         new_dataset = ds.Dataset(target_dataset.lats,
@@ -253,7 +258,7 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes,
             if path.contains_point([new_lons[iy, ix],
                                     new_lats[iy, ix]]) or not boundary_check:
                new_xy_mask[iy, ix] = 0.
-            
+
     new_index = np.where(new_xy_mask == 0.)
     # Regrid the data on each time slice
     for i in range(len(target_dataset.times)):
@@ -286,7 +291,7 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes,
         values_false_indices = np.where(values_original.mask == False)
         qmdi[values_true_indices] = 1.
         qmdi[values_false_indices] = 0.
-        qmdi_r = griddata((lons.flatten(), lats.flatten()), qmdi.flatten(), 
+        qmdi_r = griddata((lons.flatten(), lats.flatten()), qmdi.flatten(),
                              (new_lons[new_index],
                               new_lats[new_index]),
                               method='nearest')
@@ -1441,7 +1446,7 @@ def _are_bounds_contained_by_dataset(dataset, bounds):
     '''
     lat_min, lat_max, lon_min, lon_max = dataset.spatial_boundaries()
     start, end = dataset.temporal_boundaries()
-    
+
     errors = []
 
     # TODO:  THIS IS TERRIBLY inefficent and we need to use a geometry

From 94fb8d6bc5f817009074fbc47d1ac44f2492f1be Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Thu, 28 Sep 2017 02:17:01 -0700
Subject: [PATCH 11/16] Incorrect indices used for temporal subset after
 trimming

---
 ocw/dataset_processor.py |  6 ++---
 ocw/utils.py             | 58 +++++++++++++++++++++++++++-------------
 2 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py
index 22278927..160ffb72 100755
--- a/ocw/dataset_processor.py
+++ b/ocw/dataset_processor.py
@@ -83,9 +83,9 @@ def temporal_subset(target_dataset, month_start, month_end,
             logger.warning("Number of times in dataset ({}) does not "
                            "divide evenly into {} year(s). Trimming data..."
                            .format(ntime, nyear))
-            s_mon = new_times[0].month
-            e_mon = new_times[-1].month
-            new_times = new_times[13-s_mon:-e_mon]
+            slc = utils.trim_dataset(new_dataset)
+            new_dataset.values = new_dataset.values[slc]
+            new_times = new_times[slc]
             nyear = new_times.size // nmonth
 
         averaged_time = []
diff --git a/ocw/utils.py b/ocw/utils.py
index 0a798b08..c2b62cfe 100755
--- a/ocw/utils.py
+++ b/ocw/utils.py
@@ -383,14 +383,34 @@ def get_temporal_overlap(dataset_array):
     ''' Find the maximum temporal overlap across the observation and model datasets
 
     :param dataset_array: an array of OCW datasets
+    :param idx: start and end indices to denote subset of months used.
+    :type idx: class:`tuple`
     '''
-    start_time = []
-    end_time = []
+    start_times = []
+    end_times = []
     for dataset in dataset_array:
-        start_time.append(dataset.temporal_boundaries()[0])
-        end_time.append(dataset.temporal_boundaries()[1])
+        slc = trim_dataset(dataset)
+        times = dataset.times[slc]
+        start_time, end_time = times[0], times[-1]
+        start_times.append(start_time)
+        end_times.append(end_time)
 
-    return np.max(start_time), np.min(end_time)
+    return np.max(start_times), np.min(end_times)
+
+
+def trim_dataset(dataset):
+    ''' Trim datasets such that first and last year of data have all 12 months
+
+    :param dataset: Dataset object
+    :type dataset: :class:`dataset.Dataset
+
+    :returns: Slice index for trimmed dataset
+    '''
+    start_time, end_time = dataset.temporal_boundaries()
+    start_month = 13 if start_time.month == 1 else start_time.month
+    end_month = 0 if end_time.month == 12 else end_time.month
+    slc = slice(13 - start_month, len(dataset.times) - end_month)
+    return slc
 
 
 def calc_subregion_area_mean_and_std(dataset_array, subregions):
@@ -505,33 +525,33 @@ def CORDEX_boundary(domain_name):
     :param domain_name: CORDEX domain name (http://www.cordex.org/)
     :type domain_name: :mod:'string'
     '''
-    if domain_name == 'southamerica' or domain_name == 'SAM':
+    if domain_name == 'southamerica' or domain_name == 'sam':
         return -57.61, 18.50, 254.28 - 360., 343.02 - 360.
-    elif domain_name == 'centralamerica' or domain_name == 'CAM':
+    elif domain_name == 'centralamerica' or domain_name == 'cam':
         return -19.46, 34.83, 235.74 - 360., 337.78 - 360.
-    elif domain_name == 'northamerica' or domain_name == 'NAM':
+    elif domain_name == 'northamerica' or domain_name == 'nam':
         return 12.55, 75.88, 189.26 - 360., 336.74 - 360.
-    elif domain_name == 'europe' or domain_name == 'EUR':
+    elif domain_name == 'europe' or domain_name == 'eur':
         return 22.20, 71.84, 338.23 - 360., 64.4
-    elif domain_name == 'africa' or domain_name == 'AFR':
+    elif domain_name == 'africa' or domain_name == 'afr':
         return -45.76, 42.24, 335.36 - 360., 60.28
-    elif domain_name == 'southasia' or domain_name == 'WAS':
+    elif domain_name == 'southasia' or domain_name == 'was':
         return -15.23, 45.07, 19.88, 115.55
-    elif domain_name == 'eastasia' or domain_name == 'EAS':
+    elif domain_name == 'eastasia' or domain_name == 'eas':
         return -0.10, 61.90, 51.59, 179.99
-    elif domain_name == 'centralasia' or domain_name == 'CAS':
+    elif domain_name == 'centralasia' or domain_name == 'cas':
         return 18.34, 69.37, 11.05, 139.13
-    elif domain_name == 'australasia' or domain_name == 'AUS':
+    elif domain_name == 'australasia' or domain_name == 'aus':
         return -52.36, 12.21, 89.25, 179.99
-    elif domain_name == 'antartica' or domain_name == 'ANT':
+    elif domain_name == 'antartica' or domain_name == 'ant':
         return -89.48, -56.00, -179.00, 179.00
-    elif domain_name == 'artic' or domain_name == 'ARC':
+    elif domain_name == 'artic' or domain_name == 'arc':
         return 46.06, 89.50, -179.00, 179.00
-    elif domain_name == 'mediterranean' or domain_name == 'MED':
+    elif domain_name == 'mediterranean' or domain_name == 'med':
         return 25.63, 56.66, 339.79 - 360.00, 50.85
-    elif domain_name == 'middleeastnorthafrica' or domain_name == 'MNA':
+    elif domain_name == 'middleeastnorthafrica' or domain_name == 'mna':
         return -7.00, 45.00, 333.00 - 360.00, 76.00
-    elif domain_name == 'southeastasia' or domain_name == 'SEA':
+    elif domain_name == 'southeastasia' or domain_name == 'sea':
         return -15.14, 27.26, 89.26, 146.96
     else:
         err = "Invalid CORDEX domain name"

From 032c317be039565645a5c41e39415166720e1109 Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Thu, 28 Sep 2017 21:59:13 -0700
Subject: [PATCH 12/16] CLIMATE-930 - multiple file loader forces 2D lats and
 lons

---
 ocw/data_source/local.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py
index 896032f9..45a57f9b 100644
--- a/ocw/data_source/local.py
+++ b/ocw/data_source/local.py
@@ -518,11 +518,8 @@ def load_dataset_from_multiple_netcdf_files(variable_name, variable_unit=None,
 
     dataset0 = load_file(nc_files[0], variable_name, lat_name=lat_name,
                          lon_name=lon_name, time_name=time_name)
-    if dataset0.lons.ndim == 1 and dataset0.lats.ndim == 1:
-        lons, lats = numpy.meshgrid(dataset0.lons, dataset0.lats)
-    elif dataset0.lons.ndim == 2 and dataset0.lats.ndim == 2:
-        lons = dataset0.lons
-        lats = dataset0.lats
+    lons = dataset0.lons
+    lats = dataset0.lats
 
     if mask_file:
         mask_dataset = load_file(mask_file, mask_variable)

From 77463b9267e20ebf46e625f394eb35d194fd93f0 Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Thu, 28 Sep 2017 22:28:17 -0700
Subject: [PATCH 13/16] Update local_multiple_files tests for 1D lats and lons

---
 ocw/tests/test_local.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/ocw/tests/test_local.py b/ocw/tests/test_local.py
index 0cca005e..4ce5a271 100644
--- a/ocw/tests/test_local.py
+++ b/ocw/tests/test_local.py
@@ -21,7 +21,7 @@
     from urllib.request import urlretrieve
 except ImportError:
     from urllib import urlretrieve
-    
+
 import datetime
 import unittest
 import os
@@ -229,12 +229,10 @@ def test_variable_name(self):
 
     def test_function_load_dataset_from_multiple_netcdf_files_lats(self):
         """Test load_multiple_files function for times."""
-        _, self.latitudes = np.meshgrid(self.longitudes, self.latitudes)
         np.testing.assert_array_equal(self.dataset.lats, self.latitudes)
 
     def test_function_load_dataset_from_multiple_netcdf_files_lons(self):
         """Test load_multiple_files function for times."""
-        self.longitudes, _ = np.meshgrid(self.longitudes, self.latitudes)
         np.testing.assert_array_equal(self.dataset.lons, self.longitudes)
 
     def test_function_load_dataset_from_multiple_netcdf_files_times(self):
@@ -245,12 +243,10 @@ def test_function_load_dataset_from_multiple_netcdf_files_times(self):
 
     def test_function_load_dataset_from_multiple_netcdf_files_alt_lats(self):
         """Test load_multiple_files function for non-default lats."""
-        _, self.alt_lats = np.meshgrid(self.alt_lons, self.alt_lats)
         np.testing.assert_array_equal(self.alt_dataset.lats, self.alt_lats)
 
     def test_function_load_dataset_from_multiple_netcdf_files_alt_lons(self):
         """Test load_multiple_files function for non-default lons."""
-        self.alt_lons, _ = np.meshgrid(self.alt_lons, self.alt_lats)
         np.testing.assert_array_equal(self.alt_dataset.lons, self.alt_lons)
 
     def test_function_load_dataset_from_multiple_netcdf_files_alt_times(self):

From fb02c74925b977abd2c03f19ea0ccd9b15faec23 Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Thu, 28 Sep 2017 23:11:51 -0700
Subject: [PATCH 14/16] CLIMATE-927 - RCMES script for running multiple
 evaluations

---
 RCMES/CORDEX/cordex.py                      | 56 ++++++++++++++++
 RCMES/CORDEX/metadata_extractor.py          | 35 ++++++++--
 RCMES/CORDEX/templates/CORDEX.yaml.template |  8 +--
 RCMES/metrics_and_plots.py                  |  2 +-
 RCMES/run_RCMES.py                          | 24 ++++---
 ocw/plotter.py                              | 74 ++++++++++-----------
 6 files changed, 143 insertions(+), 56 deletions(-)
 create mode 100644 RCMES/CORDEX/cordex.py

diff --git a/RCMES/CORDEX/cordex.py b/RCMES/CORDEX/cordex.py
new file mode 100644
index 00000000..dc80be4e
--- /dev/null
+++ b/RCMES/CORDEX/cordex.py
@@ -0,0 +1,56 @@
+import os
+import subprocess
+import jinja2
+from metadata_extractor import CORDEXMetadataExtractor, obs4MIPSMetadataExtractor
+
+# These should be modified. TODO: domains can also be made into separate group
+# CORDEX domain
+domain = 'NAM-44'
+
+# The output directory
+workdir = '/home/goodman/data_processing/CORDEX/analysis'
+
+# Location of osb4Mips files
+obs_dir = '/proj3/data/obs4mips'
+
+# Location of CORDEX files
+models_dir = '/proj3/data/CORDEX/{domain}/*'.format(domain=domain)
+
+# Extract metadata from model and obs files, pairing up files with the same
+# variables for separate evaluations
+obs_extractor = obs4MIPSMetadataExtractor(obs_dir)
+models_extractor = CORDEXMetadataExtractor(models_dir)
+groups = obs_extractor.group(models_extractor, 'variable')
+
+# Configuration file template, to be rendered repeatedly for each evaluation
+# run
+env =  jinja2.Environment(loader=jinja2.FileSystemLoader('./templates'),
+                          trim_blocks=True, lstrip_blocks=True)
+t = env.get_template('CORDEX.yaml.template')
+
+# Each group represents a single evaluation. Repeat the evaluation for
+# three seasons: Summer, Winter, and Annual.
+seasons = ['annual', 'winter', 'summer']
+for group in groups:
+    obs_info, models_info = group
+    instrument = obs_info['instrument']
+    variable = obs_info['variable']
+    for season in seasons:
+        configfile_basename = '_'.join([domain, instrument, variable, season]) + '.yaml'
+        configfile_path = os.path.join(workdir, domain, instrument,
+                                       variable, season)
+        if not os.path.exists(configfile_path):
+            os.makedirs(configfile_path)
+        configfile_path = os.path.join(configfile_path, configfile_basename)
+        with open(configfile_path, 'w') as configfile:
+            configfile.write(t.render(obs_info=obs_info, models_info=models_info,
+                                      season=season, output_dir=workdir))
+
+        # TODO: Do this in parallel. Will change this once this approach
+        # is well tested.
+        code = subprocess.call(['python', '../run_RCMES.py', configfile_path])
+        errored = []
+        if code:
+            errored.append(configfile_path)
+
+print("All runs done. The following ended with an error: {}".format(errored))
diff --git a/RCMES/CORDEX/metadata_extractor.py b/RCMES/CORDEX/metadata_extractor.py
index cb18946b..b97d890a 100644
--- a/RCMES/CORDEX/metadata_extractor.py
+++ b/RCMES/CORDEX/metadata_extractor.py
@@ -84,6 +84,13 @@ def variables(self):
         """
         return self.get_field('variable')
 
+    @property
+    def field_filters(self):
+        """
+        Override this to filter out specific characters contained in a field.
+        """
+        return dict()
+
     def query(self, **kwargs):
         """
         Narrow down the list of files by field names.
@@ -95,7 +102,8 @@ def query(self, **kwargs):
         data = self.data
         for field, value in kwargs.items():
             value = value if isinstance(value, list) else [value]
-            data = [meta for meta in data if meta[field] in value]
+            data = [meta for meta in data
+                    if self._match_filter(meta, field) in value]
         return data
 
     def group(self, extractor, field):
@@ -116,7 +124,7 @@ def group(self, extractor, field):
 
         groups = []
         for meta in results:
-            val = meta[field]
+            val = self._match_filter(meta, field)
             kwargs.update({field: val})
             match = extractor.query(**kwargs)
             groups.append((meta, match))
@@ -151,6 +159,16 @@ def get_pattern(self, fname):
         pattern = '_'.join(base[:len(self.fields)] + ['*.nc'])
         return pattern
 
+    def _match_filter(self, meta, field):
+        """
+        Filter (ignore) certain character patterns when matching a field.
+        """
+        val = meta[field]
+        if field in self.field_filters:
+            for pattern in self.field_filters[field]:
+                val = val.replace(pattern, '')
+        return val
+
     def _extract(self):
         """
         Do the actual metadata extraction from the list of filename given
@@ -183,6 +201,13 @@ def fields(self):
         fields = ['variable', 'instrument', 'processing_level', 'version']
         return fields
 
+    @property
+    def field_filters(self):
+        """
+        Field filters for CALIPSO
+        """
+        return dict(variable=['calipso', 'Lidarsr532'])
+
     def filter_filename(self, fname):
         """
         CALIPSO files have odd naming conventions, so we will use
@@ -190,8 +215,6 @@ def filter_filename(self, fname):
         """
         fname = os.path.basename(fname)
         fname = fname.replace('_obs4MIPs_', '_')
-        fname = fname.replace('calipso', '')
-        fname = fname.replace('Lidarsr532', '')
         return fname
 
     def get_pattern(self, fname):
@@ -202,8 +225,8 @@ def get_pattern(self, fname):
         offset = -2 if len(base) != 5 else -1
         pattern = '_'.join(base[:offset] + ['*.nc'])
         return pattern
-        
-        
+
+
 class CORDEXMetadataExtractor(MetadataExtractor):
     @property
     def models(self):
diff --git a/RCMES/CORDEX/templates/CORDEX.yaml.template b/RCMES/CORDEX/templates/CORDEX.yaml.template
index daf6ec52..c0a19943 100644
--- a/RCMES/CORDEX/templates/CORDEX.yaml.template
+++ b/RCMES/CORDEX/templates/CORDEX.yaml.template
@@ -1,6 +1,6 @@
 {% set domain = models_info[0].domain %}
 {% set instrument = obs_info.instrument %}
-{% set variable = obs_info.variable %}
+{% set variable = models_info[0].variable %}
 {% set basename = [variable, instrument, domain, season]|join('_') %}
 workdir: {{ [output_dir, domain, instrument, variable, season]|join('/') }}
 output_netcdf_filename: {{ basename }}.nc
@@ -23,7 +23,7 @@ time:
     average_each_year: True
 
 space:
-    boundary_type: {{ domain[:3] }}
+    boundary_type: CORDEX {{ domain[:3] }}
 
 regrid:
     regrid_on_reference: True
@@ -32,12 +32,12 @@ datasets:
   - loader_name: local_split
     name: {{ instrument }}
     file_path: {{ obs_info.filename }}
-    variable_name: {{ variable }}
+    variable_name: {{ obs_info.variable }}
 {% for model_info in models_info %}
   - loader_name: local_split
     name: {{ model_info.model }}
     file_path: {{ model_info.filename }}
-    variable_name: {{ variable }}
+    variable_name: {{ model_info.variable }}
     lat_name: lat
     lon_name: lon
 {% endfor %}
diff --git a/RCMES/metrics_and_plots.py b/RCMES/metrics_and_plots.py
index 78c2f14f..0eb1f022 100644
--- a/RCMES/metrics_and_plots.py
+++ b/RCMES/metrics_and_plots.py
@@ -59,7 +59,7 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets
 
     string_list = list(string.ascii_lowercase)
     nmodels = len(model_datasets)
-    row, column = plotter._best_grid_shape((row, column), nmodels + 1)
+    row, column = plotter._best_grid_shape(nmodels + 1, (row, column))
     ax = fig.add_subplot(row,column,1)
     if map_projection == 'npstere':
         m = Basemap(ax=ax, projection ='npstere', boundinglat=lat_min, lon_0=0,
diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py
index 76412998..f9ec2c37 100644
--- a/RCMES/run_RCMES.py
+++ b/RCMES/run_RCMES.py
@@ -25,6 +25,11 @@
 from glob import glob
 from getpass import getpass
 import numpy as np
+
+# Need these lines to run RCMES through SSH without X11
+import matplotlib
+matplotlib.use('Agg')
+
 import ocw.utils as utils
 import ocw.dataset_processor as dsp
 from ocw.dataset import Bounds
@@ -68,7 +73,7 @@ def load_datasets_from_config(extra_opts, *loader_opts):
 temporal_resolution = time_info['temporal_resolution']
 
 # Read time info
-maximum_overlap_period = space_info.get('maximum_overlap_period', False)
+maximum_overlap_period = time_info.get('maximum_overlap_period', False)
 if not maximum_overlap_period:
     start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d')
     end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d')
@@ -86,7 +91,8 @@ def load_datasets_from_config(extra_opts, *loader_opts):
 else:
     domain = space_info['boundary_type']
     if domain.startswith('CORDEX '):
-        domain = domain.replace('CORDEX ', '').lower()
+        domain = domain.replace('CORDEX', '').lower()
+        domain = domain.replace(' ', '')
     min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(domain)
 
 # Additional arguments for the DatasetLoader
@@ -110,9 +116,9 @@ def load_datasets_from_config(extra_opts, *loader_opts):
 multiplying_factor[0] = fact
 names = [dataset.name for dataset in datasets]
 for i, dataset in enumerate(datasets):
-    if temporal_resolution == 'daily' or temporal_resolution == 'monthly':
-        datasets[i] = dsp.normalize_dataset_datetimes(dataset,
-                                                      temporal_resolution)
+    res = dataset.temporal_resolution()
+    if res == 'daily' or res == 'monthly':
+        datasets[i] = dsp.normalize_dataset_datetimes(dataset, res)
         if multiplying_factor[i] != 1:
             datasets[i].values *= multiplying_factor[i]
 
@@ -148,7 +154,7 @@ def load_datasets_from_config(extra_opts, *loader_opts):
 for i, dataset in enumerate(datasets):
     datasets[i] = dsp.subset(dataset, bounds)
     if dataset.temporal_resolution() != temporal_resolution:
-        datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution)
+        datasets[i] = dsp.temporal_rebin(datasets[i], temporal_resolution)
 
 # Temporally subset both observation and model datasets
 # for the user specified season
@@ -269,8 +275,10 @@ def load_datasets_from_config(extra_opts, *loader_opts):
         file_name = workdir+plot_info['file_name']
 
         print('metrics {0}/{1}: {2}'.format(imetric, nmetrics, metrics_name))
+        default_shape = (int(np.ceil(np.sqrt(ntarget + 2))),
+                         int(np.ceil(np.sqrt(ntarget + 2))))
         if metrics_name == 'Map_plot_bias_of_multiyear_climatology':
-            row, column = plot_info.get('subplots_array', (1, 1))
+            row, column = plot_info.get('subplots_array', default_shape)
             if 'map_projection' in plot_info.keys():
                 Map_plot_bias_of_multiyear_climatology(
                     reference_dataset, reference_name, target_datasets, target_names,
@@ -287,7 +295,7 @@ def load_datasets_from_config(extra_opts, *loader_opts):
         elif config['use_subregions']:
             if (metrics_name == 'Timeseries_plot_subregion_interannual_variability'
                 and average_each_year):
-                row, column = plot_info.get('subplots_array', (1, 1))
+                row, column = plot_info.get('subplots_array', default_shape)
                 Time_series_subregion(
                     reference_subregion_mean, reference_name, target_subregion_mean,
                     target_names, False, file_name, row, column,
diff --git a/ocw/plotter.py b/ocw/plotter.py
index 4896a649..7f9b0920 100755
--- a/ocw/plotter.py
+++ b/ocw/plotter.py
@@ -80,12 +80,12 @@ def _nice_intervals(data, nlevs):
     else:
         mnlvl = mn
         mxlvl = mx
-    
+
     # hack to make generated intervals from mpl the same for all versions
     autolimit_mode = mpl.rcParams.get('axes.autolimit_mode')
     if autolimit_mode:
         mpl.rc('axes', autolimit_mode='round_numbers')
-    
+
     locator = mpl.ticker.MaxNLocator(nlevs)
     clevs = locator.tick_values(mnlvl, mxlvl)
     if autolimit_mode:
@@ -122,14 +122,14 @@ def _best_grid_shape(nplots, oldshape):
         # rows and columns for gridshape, automatically
         # correct it so that it fits only as many plots
         # as needed
-        while diff >= ncols:
-            nrows -= 1
+        while diff >= nrows:
+            ncols -= 1
             size = nrows * ncols
             diff = size - nplots
 
         # Don't forget to remove unnecessary columns too
-        if nrows == 1:
-            ncols = nplots
+        if ncols == 1:
+            nrows = nplots
 
         newshape = nrows, ncols
         return newshape
@@ -1096,24 +1096,24 @@ def draw_histogram(dataset_array, data_names, fname, fmt='png', nbins=10):
 
 def fill_US_states_with_color(regions, fname, fmt='png', ptitle='',
                               colors=False, values=None, region_names=None):
-    
-    ''' Fill the States over the contiguous US with colors 
-   
-    :param regions: The list of subregions(lists of US States) to be filled 
+
+    ''' Fill the States over the contiguous US with colors
+
+    :param regions: The list of subregions(lists of US States) to be filled
                     with different colors.
-    :type regions: :class:`list`  
+    :type regions: :class:`list`
     :param fname: The filename of the plot.
     :type fname: :mod:`string`
     :param fmt: (Optional) filetype for the output.
     :type fmt: :mod:`string`
     :param ptitle: (Optional) plot title.
     :type ptitle: :mod:`string`
-    :param colors: (Optional) : If True, each region will be filled  
+    :param colors: (Optional) : If True, each region will be filled
                                 with different colors without using values
-    :type colors: :class:`bool` 
-    :param values: (Optional) : If colors==False, color for each region scales 
-                                an associated element in values   
-    :type values: :class:`numpy.ndarray` 
+    :type colors: :class:`bool`
+    :param values: (Optional) : If colors==False, color for each region scales
+                                an associated element in values
+    :type values: :class:`numpy.ndarray`
     '''
 
     nregion = len(regions)
@@ -1140,7 +1140,7 @@ def fill_US_states_with_color(regions, fname, fmt='png', ptitle='',
         lons=np.empty(0)
         for shape in shapes:
             patches.append(Polygon(np.array(shape), True))
-            
+
             lons = np.append(lons, shape[:,0])
             lats = np.append(lats, shape[:,1])
         if colors:
@@ -1164,13 +1164,13 @@ def draw_plot_to_compare_trends(obs_data, ens_data, model_data,
                           fname, fmt='png', ptitle='', data_labels=None,
                           xlabel='', ylabel=''):
 
-    ''' Fill the States over the contiguous US with colors 
-   
+    ''' Fill the States over the contiguous US with colors
+
     :param obs_data: An array of observed trend and standard errors for regions
     :type obs_data: :class:'numpy.ndarray'
-    :param ens_data: An array of trend and standard errors from a multi-model ensemble for regions 
+    :param ens_data: An array of trend and standard errors from a multi-model ensemble for regions
     :type ens_data: : class:'numpy.ndarray'
-    :param model_data: An array of trends from models for regions 
+    :param model_data: An array of trends from models for regions
     :type model_data: : class:'numpy.ndarray'
     :param fname: The filename of the plot.
     :type fname: :mod:`string`
@@ -1182,36 +1182,36 @@ def draw_plot_to_compare_trends(obs_data, ens_data, model_data,
     :type data_labels: :mod:`list`
     :param xlabel: (Optional) a label for x-axis
     :type xlabel: :mod:`string`
-    :param ylabel: (Optional) a label for y-axis  
+    :param ylabel: (Optional) a label for y-axis
     :type ylabel: :mod:`string`
     '''
-    nregions = obs_data.shape[1] 
+    nregions = obs_data.shape[1]
 
     # Set up the figure
     fig = plt.figure()
     fig.set_size_inches((8.5, 11.))
     fig.dpi = 300
     ax = fig.add_subplot(111)
-  
-    b_plot = ax.boxplot(model_data, widths=np.repeat(0.2, nregions), positions=np.arange(nregions)+1.3) 
+
+    b_plot = ax.boxplot(model_data, widths=np.repeat(0.2, nregions), positions=np.arange(nregions)+1.3)
     plt.setp(b_plot['medians'], color='black')
     plt.setp(b_plot['whiskers'], color='black')
     plt.setp(b_plot['boxes'], color='black')
     plt.setp(b_plot['fliers'], color='black')
-    ax.errorbar(np.arange(nregions)+0.8, obs_data[0,:], yerr=obs_data[1,:], 
-                fmt='o', color='r', ecolor='r')    
-    ax.errorbar(np.arange(nregions)+1., ens_data[0,:], yerr=ens_data[1,:], 
-                fmt='o', color='b', ecolor='b')    
+    ax.errorbar(np.arange(nregions)+0.8, obs_data[0,:], yerr=obs_data[1,:],
+                fmt='o', color='r', ecolor='r')
+    ax.errorbar(np.arange(nregions)+1., ens_data[0,:], yerr=ens_data[1,:],
+                fmt='o', color='b', ecolor='b')
     ax.set_xticks(np.arange(nregions)+1)
     ax.set_xlim([0, nregions+1])
-    
+
     if data_labels:
         ax.set_xticklabels(data_labels)
-    fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight') 
+    fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight')
 
-def draw_precipitation_JPDF (plot_data, plot_level, x_ticks, x_names,y_ticks,y_names, 
-               output_file, title=None, diff_plot=False, cmap = cm.BrBG, 
-               cbar_ticks=[0.01, 0.10, 0.5, 2, 5, 25], 
+def draw_precipitation_JPDF (plot_data, plot_level, x_ticks, x_names,y_ticks,y_names,
+               output_file, title=None, diff_plot=False, cmap = cm.BrBG,
+               cbar_ticks=[0.01, 0.10, 0.5, 2, 5, 25],
                cbar_label=['0.01', '0.10', '0.5', '2', '5', '25']):
     '''
     :param plot_data: a numpy array of data to plot (dimY, dimX)
@@ -1219,11 +1219,11 @@ def draw_precipitation_JPDF (plot_data, plot_level, x_ticks, x_names,y_ticks,y_n
     :param plot_level: levels to plot
     :type plot_level: :class:'numpy.ndarray'
     :param x_ticks: x values where tick makrs are located
-    :type x_ticks: :class:'numpy.ndarray'                 
+    :type x_ticks: :class:'numpy.ndarray'
     :param x_names: labels for the ticks on x-axis (dimX)
     :type x_names: :class:'list'
     :param y_ticks: y values where tick makrs are located
-    :type y_ticks: :class:'numpy.ndarray'                 
+    :type y_ticks: :class:'numpy.ndarray'
     :param y_names: labels for the ticks on y-axis (dimY)
     :type y_names: :class:'list'
     :param output_file: name of output png file
@@ -1246,7 +1246,7 @@ def draw_precipitation_JPDF (plot_data, plot_level, x_ticks, x_names,y_ticks,y_n
     dimY, dimX = plot_data.shape
     plot_data2 = np.zeros([dimY,dimX]) # sectioned array for plotting
 
-    # fontsize 
+    # fontsize
     rcParams['axes.labelsize'] = 8
     rcParams['xtick.labelsize'] = 8
     rcParams['ytick.labelsize'] = 8

From 58d6cdf16e52da9c326381718a80ecc33e880eb4 Mon Sep 17 00:00:00 2001
From: Alex <agoodm@users.noreply.github.com>
Date: Thu, 28 Sep 2017 23:33:13 -0700
Subject: [PATCH 15/16] Update plotter unit tests for _best_gridshape changes

---
 ocw/tests/test_plotter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ocw/tests/test_plotter.py b/ocw/tests/test_plotter.py
index 19ba66a7..894e0c4b 100644
--- a/ocw/tests/test_plotter.py
+++ b/ocw/tests/test_plotter.py
@@ -51,14 +51,14 @@ class TestBestGridShapeFunction(unittest.TestCase):
     def test_returned_shape_small(self):
         nplots = 2
         oldshape = (2, 2)
-        expected_shape = (1, 2)
+        expected_shape = (2, 1)
         new_shape = plotter._best_grid_shape(nplots, oldshape)
         self.assertEqual(new_shape, expected_shape)
 
     def test_returned_shape_large(self):
         nplots = 57
-        oldshape = (220, 12)
-        expected_shape = (5, 12)
+        oldshape = (12, 220)
+        expected_shape = (12, 5)
         new_shape = plotter._best_grid_shape(nplots, oldshape)
         self.assertEqual(new_shape, expected_shape)
 

From e7d99b6ccf00b2f406a3b93217f8512c3dfe2184 Mon Sep 17 00:00:00 2001
From: huikyole <huikyole@argo.jpl.nasa.gov>
Date: Sun, 22 Oct 2017 19:34:40 -0700
Subject: [PATCH 16/16] CLIMATE-932 - A whitespace error in running the CORDEX
 script

- RCMES/run_RCMES.py has been fixed
---
 RCMES/run_RCMES.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py
index f9ec2c37..3c7291e4 100644
--- a/RCMES/run_RCMES.py
+++ b/RCMES/run_RCMES.py
@@ -90,11 +90,12 @@ def load_datasets_from_config(extra_opts, *loader_opts):
     max_lon = space_info['max_lon']
 else:
     domain = space_info['boundary_type']
-    if domain.startswith('CORDEX '):
+    if 'CORDEX' in domain:
         domain = domain.replace('CORDEX', '').lower()
-        domain = domain.replace(' ', '')
+        domain = domain.strip()          
     min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(domain)
 
+
 # Additional arguments for the DatasetLoader
 extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon,
               'max_lon': max_lon, 'start_time': start_time,