From 8217d12f06987d852f9294da94a5af243116e751 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 25 Sep 2017 10:35:20 -0700 Subject: [PATCH 01/16] CLIMATE-926 - Metadata Extractors --- RCMES/CORDEX/metadata_extractor.py | 222 +++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 RCMES/CORDEX/metadata_extractor.py diff --git a/RCMES/CORDEX/metadata_extractor.py b/RCMES/CORDEX/metadata_extractor.py new file mode 100644 index 00000000..7351cf4f --- /dev/null +++ b/RCMES/CORDEX/metadata_extractor.py @@ -0,0 +1,222 @@ +import glob +import os + + +class MetadataExtractor(object): + def __init__(self, *paths): + """Extracts metadata from data filenames. + + Instances of MetadataExtractor are used to extract metadata from + filenames in bulk. Example usage: + >>> extractor = MetadataExtractor('/path/to/data') + + Suppose the data in this directory had the following files: + pr_*.nc, uas_*.nc, vas_*.nc + + All of the metadata lies in the data attribute: + >>> extractor.data + [{'filename': /path/to/data/pr_*.nc, 'variable': 'pr'}, + {'filename': /path/to/data/vas_*.nc, 'variable': 'vas'}, + {'filename': /path/to/data/uas_*.nc, 'variable': 'uas'}] + + Results can be narrowed down by specifying values for a field: + >>> extractor.query(variable='pr') + [{'filename': /path/to/data/pr_*.nc, 'variable': 'pr'}] + + Finally, metadata from two sets of extractors can be grouped together + based on common field name as follows: + >>> extractor.group(extractor2, 'variable') + + This class should only be used as a starting point. We recommend using + the included obs4MIPSMetadataExtractor and CORDEXMetadataExtractor + subclasses or creating your own subclass for your usecase. + """ + self.paths = paths + + @property + def data(self): + """ + The extracted metadata for each file, with all fields listed in + the fields attribute included. + """ + return self._data + + @property + def paths(self): + """ + Search paths containing the dataset files. + """ + return self._paths + + @paths.setter + def paths(self, paths): + """ + Extracts the metadata from scratch when paths are reset. + """ + self._paths = paths + self._extract() + + @property + def fields(self): + """ + The name of field in the filename, assuming the fully filtered + filename conforms to the following convention: + filename = __..._.nc. Using fewer fields + than the filename defines is allowed. + """ + fields = ['variable'] + return fields + + @property + def files(self): + """ + List of files (or regular expressions) for each dataset. + """ + files = [] + for path in self.paths: + files.extend(glob.glob(os.path.join(path, '*.nc'))) + return list(set(self.get_pattern(fname) for fname in files)) + + @property + def variables(self): + """ + Get the list of variables included accross all the datasets. + """ + return self.get_field('variable') + + def query(self, **kwargs): + """ + Narrow down the list of files by field names. + """ + fields = kwargs.keys() + if not set(fields).issubset(set(self.fields)): + raise ValueError("Invalid fields: {}. Must be subset of: {}" + .format(fields, self.fields)) + data = self.data + for field, value in kwargs.items(): + value = value if isinstance(value, list) else [value] + data = [meta for meta in data if meta[field] in value] + return data + + def group(self, extractor, field): + """ + Compare the data of this extractor with another extractor instance + and group each of their metadata together by given field. + """ + # First we only want to consider values of field which are contained + # in both extractors + subset = self.get_field(field) + other_subset = extractor.get_field(field) + intersection = list(subset.intersection(other_subset)) + + # Next we will group the datasets in each extractor together by common + # field values + kwargs = {field: intersection} + results = self.query(**kwargs) + + groups = [] + for meta in results: + val = meta[field] + kwargs.update({field: val}) + match = extractor.query(**kwargs) + groups.append((meta, match)) + + return groups + + def get_field(self, field): + """ + Returns only the selected field of the extracted data. + """ + if field not in self.fields: + raise ValueError("Invalid field: {}. Must be one of: {}" + .format(field, self.fields)) + sub = set(meta[field] for meta in self.data) + return sub + + def filter_filename(self, fname): + """ + Applies a filter to each individual filename contained in the _files + attribute, which is useful if some files within a data set are known + to not follow conventions, and "fix" them so that they do. + """ + return os.path.basename(fname) + + def get_pattern(self, fname): + """ + Used to group multiple file datasets together via regular expresssions. + The most common convention is to split files by time periods, which + are generally the last field in a filename. + """ + base = fname.split('_') + pattern = '_'.join(base[:len(self.fields)] + ['*.nc']) + return pattern + + def _extract(self): + """ + Do the actual metadata extraction from the list of filename given + via filter_filelist(). Additionally, filenames can also be filtered + via filter_filename() to remove unwanted characters from the extraction. + """ + self._data = [] + for fname in self.files: + meta = dict(filename=fname) + + # Perform the actual metadata extraction + fname = self.filter_filename(fname) + meta.update(dict(zip(self.fields, fname.split('_')[:-1]))) + self._data.append(meta) + + +class obs4MIPSMetadataExtractor(MetadataExtractor): + @property + def instruments(self): + """ + Get the list of instruments accross all the datasets. + """ + return self.get_field('instrument') + + @property + def fields(self): + """ + obs4MIPs fields + """ + fields = ['variable', 'instrument', 'processing_level', 'version'] + return fields + + def filter_filename(self, fname): + """ + CALIPSO files have odd naming conventions, so we will use + a modified version to conform to standard obs4MIPs conventions. + """ + fname = os.path.basename(fname) + fname = fname.replace('_obs4MIPs_', '_') + fname = fname.replace('calipso', '') + fname = fname.replace('Lidarsr532', '') + return fname + + def get_pattern(self, fname): + """ + Overriden to deal with CALIPSO filenames + """ + base = fname.split('_') + offset = -2 if len(base) != 5 else -1 + pattern = '_'.join(base[:offset] + ['*.nc']) + return pattern + + +class CORDEXMetadataExtractor(MetadataExtractor): + @property + def models(self): + """ + Get the list of models accross all the datasets. + """ + return self.get_field('models') + + @property + def fields(self): + """ + obs4MIPs fields + """ + fields = ['variable', 'domain', 'driving_model', 'experiment', + 'ensemble', 'model', 'version', 'time_step'] + return fields From ec81037a21dbf2cf1999422127bbe924e1072c4a Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Mon, 25 Sep 2017 10:41:07 -0700 Subject: [PATCH 02/16] Fix model attribute in CORDEXMetadataExtractor --- RCMES/CORDEX/metadata_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RCMES/CORDEX/metadata_extractor.py b/RCMES/CORDEX/metadata_extractor.py index 7351cf4f..cb18946b 100644 --- a/RCMES/CORDEX/metadata_extractor.py +++ b/RCMES/CORDEX/metadata_extractor.py @@ -210,7 +210,7 @@ def models(self): """ Get the list of models accross all the datasets. """ - return self.get_field('models') + return self.get_field('model') @property def fields(self): From ba93542a75172b98f556747e520a0239ca8e6012 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 26 Sep 2017 13:38:30 -0700 Subject: [PATCH 03/16] CLIMATE 919 - load_dataset_from_multiple_netcdf_files() does not have default variable units --- ocw/data_source/local.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py index be23bb26..734b0918 100644 --- a/ocw/data_source/local.py +++ b/ocw/data_source/local.py @@ -170,7 +170,7 @@ def load_WRF_2d_files(file_path=None, values0 = file_object.variables[variable_name][:] if isinstance(values0, numpy.ndarray): values0 = ma.array(values0, - mask=numpy.zeros(values0.shape)) + mask=numpy.zeros(values0.shape)) if ifile == 0: values = values0 variable_unit = file_object.variables[variable_name].units @@ -260,7 +260,11 @@ def load_file(file_path, times = utils.decode_time_values(netcdf, time_name) times = numpy.array(times) values = ma.array(netcdf.variables[variable_name][:]) - variable_unit = netcdf.variables[variable_name].units + if not variable_unit: + if hasattr(netcdf.variables[variable_name], 'units'): + variable_unit = netcdf.variables[variable_name].units + else: + variable_unit = '1' # If the values are 4D then we need to strip out the elevation index if len(values.shape) == 4: @@ -314,7 +318,7 @@ def load_multiple_files(file_path, :type file_path: :mod:`string` :param dataset_name: a list of dataset names. Data filenames must include the elements of dataset_name list. :type dataset_name: :mod:'list' - :param generic_dataset_name: If false, data filenames must include the elements of dataset_name list. + :param generic_dataset_name: If false, data filenames must include the elements of dataset_name list. :type generic_dataset_name: :mod:'bool' :param variable_name: The variable name to load from the NetCDF file. :type variable_name: :mod:`string` @@ -356,11 +360,11 @@ def load_multiple_files(file_path, lat_name=lat_name, lon_name=lon_name, time_name=time_name)) else: - data_name = [i for i in dataset_name] + data_name = [i for i in dataset_name] pattern = [['*'+i+'*'] for i in dataset_name] if file_path[-1] != '/': file_path = file_path+'/' - + ndata = len(dataset_name) for idata in numpy.arange(ndata): datasets.append(load_dataset_from_multiple_netcdf_files(variable_name, @@ -536,7 +540,8 @@ def load_dataset_from_multiple_netcdf_files(variable_name, variable_unit=None, else: data_values = numpy.concatenate((data_values, values0)) times = numpy.array(times) - return Dataset(lats, lons, times, data_values, + variable_unit = dataset0.units if not variable_unit else variable_unit + return Dataset(lats, lons, times, data_values, variable_name, units=variable_unit,name=name) @@ -674,7 +679,7 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None, filename_pattern=None, filelist=None, variable_name='precipitationCal', - user_mask_file=None, + user_mask_file=None, mask_variable_name='mask', user_mask_values=[10], longitude_name='lon', @@ -694,11 +699,11 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None, :type name: :mod:`string` :user_mask_file: user's own gridded mask file(a netCDF file name) :type name: :mod:`string` - :mask_variable_name: mask variables in user_mask_file + :mask_variable_name: mask variables in user_mask_file :type name: :mod:`string` - :longitude_name: longitude variable in user_mask_file + :longitude_name: longitude variable in user_mask_file :type name: :mod:`string` - :latitude_name: latitude variable in user_mask_file + :latitude_name: latitude variable in user_mask_file :type name: :mod:`string` :param user_mask_values: grid points where mask_variable == user_mask_value will be extracted. :type user_mask_values: list of strings @@ -731,7 +736,7 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None, mask_longitude = file_object.variables[longitude_name][:] mask_latitude = file_object.variables[latitude_name][:] spatial_mask = utils.regrid_spatial_mask(lons,lats, - mask_longitude, mask_latitude, + mask_longitude, mask_latitude, mask_variable, user_mask_values) y_index, x_index = numpy.where(spatial_mask == 0) @@ -739,7 +744,7 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None, file_object = h5py.File(file) values0 = ma.transpose(ma.masked_less( file_object['Grid'][variable_name][:], 0.)) - values_masked = values0[y_index, x_index] + values_masked = values0[y_index, x_index] values_masked = ma.expand_dims(values_masked, axis=0) if ifile == 0: values = values_masked @@ -747,5 +752,3 @@ def load_GPM_IMERG_files_with_spatial_filter(file_path=None, values = ma.concatenate((values, values_masked)) file_object.close() return values - - From d868be10eb3862798f456e45e5ae2d3687dd401c Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 26 Sep 2017 16:03:09 -0700 Subject: [PATCH 04/16] Allow acronyms for CORDEX domains --- ocw/utils.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/ocw/utils.py b/ocw/utils.py index db4b9268..0a798b08 100755 --- a/ocw/utils.py +++ b/ocw/utils.py @@ -505,33 +505,33 @@ def CORDEX_boundary(domain_name): :param domain_name: CORDEX domain name (http://www.cordex.org/) :type domain_name: :mod:'string' ''' - if domain_name == 'southamerica': + if domain_name == 'southamerica' or domain_name == 'SAM': return -57.61, 18.50, 254.28 - 360., 343.02 - 360. - elif domain_name == 'centralamerica': + elif domain_name == 'centralamerica' or domain_name == 'CAM': return -19.46, 34.83, 235.74 - 360., 337.78 - 360. - elif domain_name == 'northamerica': + elif domain_name == 'northamerica' or domain_name == 'NAM': return 12.55, 75.88, 189.26 - 360., 336.74 - 360. - elif domain_name == 'europe': + elif domain_name == 'europe' or domain_name == 'EUR': return 22.20, 71.84, 338.23 - 360., 64.4 - elif domain_name == 'africa': + elif domain_name == 'africa' or domain_name == 'AFR': return -45.76, 42.24, 335.36 - 360., 60.28 - elif domain_name == 'southasia': + elif domain_name == 'southasia' or domain_name == 'WAS': return -15.23, 45.07, 19.88, 115.55 - elif domain_name == 'eastasia': + elif domain_name == 'eastasia' or domain_name == 'EAS': return -0.10, 61.90, 51.59, 179.99 - elif domain_name == 'centralasia': + elif domain_name == 'centralasia' or domain_name == 'CAS': return 18.34, 69.37, 11.05, 139.13 - elif domain_name == 'australasia': + elif domain_name == 'australasia' or domain_name == 'AUS': return -52.36, 12.21, 89.25, 179.99 - elif domain_name == 'antartica': + elif domain_name == 'antartica' or domain_name == 'ANT': return -89.48, -56.00, -179.00, 179.00 - elif domain_name == 'artic': + elif domain_name == 'artic' or domain_name == 'ARC': return 46.06, 89.50, -179.00, 179.00 - elif domain_name == 'mediterranean': + elif domain_name == 'mediterranean' or domain_name == 'MED': return 25.63, 56.66, 339.79 - 360.00, 50.85 - elif domain_name == 'middleeastnorthafrica': + elif domain_name == 'middleeastnorthafrica' or domain_name == 'MNA': return -7.00, 45.00, 333.00 - 360.00, 76.00 - elif domain_name == 'southeastasia': + elif domain_name == 'southeastasia' or domain_name == 'SEA': return -15.14, 27.26, 89.26, 146.96 else: err = "Invalid CORDEX domain name" @@ -627,7 +627,7 @@ def _force_unicode(s, encoding='utf-8'): ''' if hasattr(s, 'decode'): s = s.decode(encoding=encoding) - + return s def calculate_temporal_trends(dataset): @@ -649,7 +649,7 @@ def calculate_temporal_trends(dataset): if dataset.values[:,iy,ix].count() == nt: trend[iy,ix], slope_err[iy,ix] = calculate_temporal_trend_of_time_series( x, dataset.values[:,iy,ix]) - + return ma.masked_equal(trend, -999.), ma.masked_equal(slope_err, -999.) def calculate_ensemble_temporal_trends(timeseries_array, number_of_samples=1000): @@ -663,20 +663,20 @@ def calculate_ensemble_temporal_trends(timeseries_array, number_of_samples=1000) :returns: temporal trend and estimated error from bootstrapping :rtype: :float:`float','float' ''' - + nmodels, nt = timeseries_array.shape x = np.arange(nt) sampled_trend = np.zeros(number_of_samples) ensemble_trend, _ = calculate_temporal_trend_of_time_series( x, np.mean(timeseries_array, axis=0)) - + for isample in np.arange(number_of_samples): index = np.random.choice(nmodels, size=nmodels, replace=True) random_ensemble = np.mean(timeseries_array[index, :], axis=0) sampled_trend[isample], _ = calculate_temporal_trend_of_time_series( x, random_ensemble) return ensemble_trend, np.std(sampled_trend, ddof=1) - + def calculate_temporal_trend_of_time_series(x,y): ''' Calculate least-square trends (a) in y = ax+b and a's standard error From 3e66d94fce1935aac2a8cc88025d50858d78f3ba Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 26 Sep 2017 16:10:44 -0700 Subject: [PATCH 05/16] Default vales for subplot grid in RCMES script --- RCMES/metrics_and_plots.py | 65 ++++++++++++++++++-------------------- RCMES/run_RCMES.py | 8 ++--- 2 files changed, 34 insertions(+), 39 deletions(-) diff --git a/RCMES/metrics_and_plots.py b/RCMES/metrics_and_plots.py index 848af9de..78c2f14f 100644 --- a/RCMES/metrics_and_plots.py +++ b/RCMES/metrics_and_plots.py @@ -27,7 +27,7 @@ import numpy as np import numpy.ma as ma import matplotlib.pyplot as plt -from mpl_toolkits.basemap import Basemap +from mpl_toolkits.basemap import Basemap from matplotlib import rcParams from matplotlib.patches import Polygon import string @@ -46,7 +46,7 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets model_datasets, # list of target datasets for the evaluation [map_of_bias, map_of_bias]) # run the evaluation (bias calculation) - bias_evaluation.run() + bias_evaluation.run() rcm_bias = bias_evaluation.results[0] @@ -57,7 +57,9 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets lon_min = obs_dataset.lons.min() lon_max = obs_dataset.lons.max() - string_list = list(string.ascii_lowercase) + string_list = list(string.ascii_lowercase) + nmodels = len(model_datasets) + row, column = plotter._best_grid_shape((row, column), nmodels + 1) ax = fig.add_subplot(row,column,1) if map_projection == 'npstere': m = Basemap(ax=ax, projection ='npstere', boundinglat=lat_min, lon_0=0, @@ -79,9 +81,9 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets max = m.contourf(x,y,obs_clim,levels = plotter._nice_intervals(obs_dataset.values, 10), extend='both',cmap='rainbow') ax.annotate('(a) \n' + obs_name,xy=(lon_min, lat_min)) cax = fig.add_axes([0.02, 1.-float(1./row)+1./row*0.25, 0.01, 1./row*0.5]) - plt.colorbar(max, cax = cax) + plt.colorbar(max, cax = cax) clevs = plotter._nice_intervals(rcm_bias, 11) - for imodel in np.arange(len(model_datasets)): + for imodel in np.arange(nmodels): ax = fig.add_subplot(row, column,2+imodel) if map_projection == 'npstere': @@ -97,7 +99,7 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets ax.annotate('('+string_list[imodel+1]+') \n '+model_names[imodel],xy=(lon_min, lat_min)) cax = fig.add_axes([0.91, 0.5, 0.015, 0.4]) - plt.colorbar(max, cax = cax) + plt.colorbar(max, cax = cax) plt.subplots_adjust(hspace=0.01,wspace=0.05) @@ -122,16 +124,16 @@ def Taylor_diagram_spatial_pattern_of_multiyear_climatology(obs_dataset, obs_nam [taylor_diagram]) # run the evaluation (bias calculation) - taylor_evaluation.run() + taylor_evaluation.run() taylor_data = taylor_evaluation.results[0] plotter.draw_taylor_diagram(taylor_data, model_names, obs_name, file_name, pos='upper right',frameon=False) -def Time_series_subregion(obs_subregion_mean, obs_name, model_subregion_mean, model_names, seasonal_cycle, +def Time_series_subregion(obs_subregion_mean, obs_name, model_subregion_mean, model_names, seasonal_cycle, file_name, row, column, x_tick=['']): - nmodel, nt, nregion = model_subregion_mean.shape + nmodel, nt, nregion = model_subregion_mean.shape if seasonal_cycle: obs_data = ma.mean(obs_subregion_mean.reshape([1,nt/12,12,nregion]), axis=1) @@ -140,19 +142,19 @@ def Time_series_subregion(obs_subregion_mean, obs_name, model_subregion_mean, mo else: obs_data = obs_subregion_mean model_data = model_subregion_mean - + x_axis = np.arange(nt) x_tick_values = x_axis fig = plt.figure() rcParams['xtick.labelsize'] = 6 rcParams['ytick.labelsize'] = 6 - + for iregion in np.arange(nregion): - ax = fig.add_subplot(row, column, iregion+1) + ax = fig.add_subplot(row, column, iregion+1) x_tick_labels = [''] if iregion+1 > column*(row-1): - x_tick_labels = x_tick + x_tick_labels = x_tick else: x_tick_labels=[''] ax.plot(x_axis, obs_data[0, :, iregion], color='r', lw=2, label=obs_name) @@ -162,8 +164,8 @@ def Time_series_subregion(obs_subregion_mean, obs_name, model_subregion_mean, mo ax.set_xticks(x_tick_values) ax.set_xticklabels(x_tick_labels) ax.set_title('Region %02d' % (iregion+1), fontsize=8) - - ax.legend(bbox_to_anchor=(-0.2, row/2), loc='center' , prop={'size':7}, frameon=False) + + ax.legend(bbox_to_anchor=(-0.2, row/2), loc='center' , prop={'size':7}, frameon=False) fig.subplots_adjust(hspace=0.7, wspace=0.5) fig.savefig(file_name, dpi=600, bbox_inches='tight') @@ -172,7 +174,7 @@ def Portrait_diagram_subregion(obs_subregion_mean, obs_name, model_subregion_mea file_name, normalize=True): nmodel, nt, nregion = model_subregion_mean.shape - + if seasonal_cycle: obs_data = ma.mean(obs_subregion_mean.reshape([1,nt/12,12,nregion]), axis=1) model_data = ma.mean(model_subregion_mean.reshape([nmodel,nt/12,12,nregion]), axis=1) @@ -193,35 +195,35 @@ def Portrait_diagram_subregion(obs_subregion_mean, obs_name, model_subregion_mea subregion_metrics[2, iregion, imodel] = metrics.calc_rmse(model_data[imodel, :, iregion], obs_data[0, :, iregion]) # Fourth metric: correlation subregion_metrics[3, iregion, imodel] = metrics.calc_correlation(model_data[imodel, :, iregion], obs_data[0, :, iregion]) - + if normalize: for iregion in np.arange(nregion): - subregion_metrics[0, iregion, : ] = subregion_metrics[0, iregion, : ]/ma.std(obs_data[0, :, iregion])*100. - subregion_metrics[1, iregion, : ] = subregion_metrics[1, iregion, : ]*100. - subregion_metrics[2, iregion, : ] = subregion_metrics[2, iregion, : ]/ma.std(obs_data[0, :, iregion])*100. + subregion_metrics[0, iregion, : ] = subregion_metrics[0, iregion, : ]/ma.std(obs_data[0, :, iregion])*100. + subregion_metrics[1, iregion, : ] = subregion_metrics[1, iregion, : ]*100. + subregion_metrics[2, iregion, : ] = subregion_metrics[2, iregion, : ]/ma.std(obs_data[0, :, iregion])*100. region_names = ['R%02d' % i for i in np.arange(nregion)+1] for imetric, metric in enumerate(['bias','std','RMSE','corr']): - plotter.draw_portrait_diagram(subregion_metrics[imetric, :, :], region_names, model_names, file_name+'_'+metric, - xlabel='model',ylabel='region') + plotter.draw_portrait_diagram(subregion_metrics[imetric, :, :], region_names, model_names, file_name+'_'+metric, + xlabel='model',ylabel='region') def Map_plot_subregion(subregions, ref_dataset, directory): - - lons, lats = np.meshgrid(ref_dataset.lons, ref_dataset.lats) + + lons, lats = np.meshgrid(ref_dataset.lons, ref_dataset.lats) fig = plt.figure() ax = fig.add_subplot(111) m = Basemap(ax=ax, projection='cyl',llcrnrlat = lats.min(), urcrnrlat = lats.max(), llcrnrlon = lons.min(), urcrnrlon = lons.max(), resolution = 'l') m.drawcoastlines(linewidth=0.75) m.drawcountries(linewidth=0.75) - m.etopo() - x, y = m(lons, lats) + m.etopo() + x, y = m(lons, lats) #subregion_array = ma.masked_equal(subregion_array, 0) #max=m.contourf(x, y, subregion_array, alpha=0.7, cmap='Accent') for subregion in subregions: - draw_screen_poly(subregion[1], m, 'w') - plt.annotate(subregion[0],xy=(0.5*(subregion[1][2]+subregion[1][3]), 0.5*(subregion[1][0]+subregion[1][1])), ha='center',va='center', fontsize=8) + draw_screen_poly(subregion[1], m, 'w') + plt.annotate(subregion[0],xy=(0.5*(subregion[1][2]+subregion[1][3]), 0.5*(subregion[1][0]+subregion[1][1])), ha='center',va='center', fontsize=8) fig.savefig(directory+'map_subregion', bbox_inches='tight') def draw_screen_poly(boundary_array, m, linecolor='k'): @@ -238,10 +240,3 @@ def draw_screen_poly(boundary_array, m, linecolor='k'): xy = zip(x,y) poly = Polygon( xy, facecolor='none',edgecolor=linecolor ) plt.gca().add_patch(poly) - - - - - - - diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py index 027d2e26..ba8ef2a2 100644 --- a/RCMES/run_RCMES.py +++ b/RCMES/run_RCMES.py @@ -94,7 +94,7 @@ def load_datasets_from_config(extra_opts, *loader_opts): # (first) dataset. We should instead make this a parameter for each # loader and Dataset objects. fact = data_info[0].pop('multiplying_factor', 1) - + """ Step 1: Load the datasets """ print('Loading datasets:\n{}'.format(data_info)) datasets = load_datasets_from_config(extra_opts, *data_info) @@ -262,7 +262,7 @@ def load_datasets_from_config(extra_opts, *loader_opts): print('metrics {0}/{1}: {2}'.format(imetric, nmetrics, metrics_name)) if metrics_name == 'Map_plot_bias_of_multiyear_climatology': - row, column = plot_info['subplots_array'] + row, column = plot_info.get('subplots_array', (1, 1)) if 'map_projection' in plot_info.keys(): Map_plot_bias_of_multiyear_climatology( reference_dataset, reference_name, target_datasets, target_names, @@ -279,7 +279,7 @@ def load_datasets_from_config(extra_opts, *loader_opts): elif config['use_subregions']: if (metrics_name == 'Timeseries_plot_subregion_interannual_variability' and average_each_year): - row, column = plot_info['subplots_array'] + row, column = plot_info.get('subplots_array', (1, 1)) Time_series_subregion( reference_subregion_mean, reference_name, target_subregion_mean, target_names, False, file_name, row, column, @@ -288,7 +288,7 @@ def load_datasets_from_config(extra_opts, *loader_opts): if (metrics_name == 'Timeseries_plot_subregion_annual_cycle' and not average_each_year and month_start==1 and month_end==12): - row, column = plot_info['subplots_array'] + row, column = plot_info.get('subplots_array', (1, 1)) Time_series_subregion( reference_subregion_mean, reference_name, target_subregion_mean, target_names, True, From eacee1f5a2569a191d4961621d37281ccdf06017 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 26 Sep 2017 16:19:39 -0700 Subject: [PATCH 06/16] Allow acronyms for CORDEX domains --- RCMES/run_RCMES.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py index ba8ef2a2..f729f98b 100644 --- a/RCMES/run_RCMES.py +++ b/RCMES/run_RCMES.py @@ -79,7 +79,10 @@ def load_datasets_from_config(extra_opts, *loader_opts): min_lon = space_info['min_lon'] max_lon = space_info['max_lon'] else: - min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(space_info['boundary_type'][6:].replace(" ","").lower()) + domain = space_info['boundary_type'] + if domain.startswith('CORDEX '): + domain = domain.replace('CORDEX ', '').lower() + min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(domain) # Additional arguments for the DatasetLoader extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon, From 8168d2042390712f9d58051633d93c267a002756 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 26 Sep 2017 16:21:09 -0700 Subject: [PATCH 07/16] Default values for filename_pattern in split file loader --- ocw/data_source/local.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py index 734b0918..896032f9 100644 --- a/ocw/data_source/local.py +++ b/ocw/data_source/local.py @@ -508,6 +508,7 @@ def load_dataset_from_multiple_netcdf_files(variable_name, variable_unit=None, ''' nc_files = [] if not file_list: + filename_pattern = [''] if not filename_pattern else filename_pattern for pattern in filename_pattern: nc_files.extend(glob(file_path + pattern)) else: From 368e6969ca992e29b7bf49906449c4c92a913d2f Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 26 Sep 2017 16:33:14 -0700 Subject: [PATCH 08/16] Ignore start and end times when maximum_overlap is True --- RCMES/run_RCMES.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py index f729f98b..76412998 100644 --- a/RCMES/run_RCMES.py +++ b/RCMES/run_RCMES.py @@ -68,8 +68,13 @@ def load_datasets_from_config(extra_opts, *loader_opts): temporal_resolution = time_info['temporal_resolution'] # Read time info -start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') -end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') +maximum_overlap_period = space_info.get('maximum_overlap_period', False) +if not maximum_overlap_period: + start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') + end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') +else: + # These values will be determined after datasets are loaded + start_time, end_time = None, None # Read space info space_info = config['space'] @@ -113,7 +118,7 @@ def load_datasets_from_config(extra_opts, *loader_opts): """ Step 2: Subset the data for temporal and spatial domain """ # Create a Bounds object to use for subsetting -if time_info['maximum_overlap_period']: +if maximum_overlap_period: start_time, end_time = utils.get_temporal_overlap(datasets) print('Maximum overlap period') print('start_time: {}'.format(start_time)) From 3539aa2be6408140e33d82fb413089c62fcec70f Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 26 Sep 2017 16:41:06 -0700 Subject: [PATCH 09/16] CLIMATE-925 - CORDEX Config File Template --- RCMES/CORDEX/templates/CORDEX.yaml.template | 57 +++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 RCMES/CORDEX/templates/CORDEX.yaml.template diff --git a/RCMES/CORDEX/templates/CORDEX.yaml.template b/RCMES/CORDEX/templates/CORDEX.yaml.template new file mode 100644 index 00000000..daf6ec52 --- /dev/null +++ b/RCMES/CORDEX/templates/CORDEX.yaml.template @@ -0,0 +1,57 @@ +{% set domain = models_info[0].domain %} +{% set instrument = obs_info.instrument %} +{% set variable = obs_info.variable %} +{% set basename = [variable, instrument, domain, season]|join('_') %} +workdir: {{ [output_dir, domain, instrument, variable, season]|join('/') }} +output_netcdf_filename: {{ basename }}.nc + +# (RCMES will temporally subset data between month_start and month_end. +# If average_each_year is True (False), seasonal mean in each year is (not) calculated and used for metrics calculation.) +time: + maximum_overlap_period: True + temporal_resolution: monthly +{% if season == "winter" %} + month_start: 12 + month_end: 2 +{% elif season == "summer" %} + month_start: 6 + month_end: 8 +{% else %} + month_start: 1 + month_end: 12 +{% endif %} + average_each_year: True + +space: + boundary_type: {{ domain[:3] }} + +regrid: + regrid_on_reference: True + +datasets: + - loader_name: local_split + name: {{ instrument }} + file_path: {{ obs_info.filename }} + variable_name: {{ variable }} +{% for model_info in models_info %} + - loader_name: local_split + name: {{ model_info.model }} + file_path: {{ model_info.filename }} + variable_name: {{ variable }} + lat_name: lat + lon_name: lon +{% endfor %} + +number_of_metrics_and_plots: 2 + +metrics1: Map_plot_bias_of_multiyear_climatology + +plots1: + file_name: {{ basename }}_bias + +metrics2: Taylor_diagram_spatial_pattern_of_multiyear_climatology + +plots2: + file_name: {{ basename }}_taylor + +use_subregions: False From cb9428413baccf8128df7855467838ce1600049c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 27 Sep 2017 18:35:18 -0700 Subject: [PATCH 10/16] CLIMATE-928 - temporal_subset should trim edges of dataset times to ensure months divide evenly into years --- ocw/dataset_processor.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 2097cc42..22278927 100755 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -75,13 +75,18 @@ def temporal_subset(target_dataset, month_start, month_end, name=target_dataset.name) if average_each_year: + new_times = new_dataset.times nmonth = len(month_index) - ntime = new_dataset.times.size + ntime = new_times.size nyear = ntime // nmonth if ntime % nmonth != 0: - raise ValueError("Number of times in dataset ({}) does not " - "divide evenly into {} year(s)." - .format(ntime, nyear)) + logger.warning("Number of times in dataset ({}) does not " + "divide evenly into {} year(s). Trimming data..." + .format(ntime, nyear)) + s_mon = new_times[0].month + e_mon = new_times[-1].month + new_times = new_times[13-s_mon:-e_mon] + nyear = new_times.size // nmonth averaged_time = [] ny, nx = target_dataset.values.shape[1:] @@ -92,7 +97,7 @@ def temporal_subset(target_dataset, month_start, month_end, center_index = int(nmonth / 2 + iyear * nmonth) if nmonth == 1: center_index = iyear - averaged_time.append(new_dataset.times[center_index]) + averaged_time.append(new_times[center_index]) averaged_values[iyear, :] = ma.average(new_dataset.values[ nmonth * iyear: nmonth * iyear + nmonth, :], axis=0) new_dataset = ds.Dataset(target_dataset.lats, @@ -253,7 +258,7 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes, if path.contains_point([new_lons[iy, ix], new_lats[iy, ix]]) or not boundary_check: new_xy_mask[iy, ix] = 0. - + new_index = np.where(new_xy_mask == 0.) # Regrid the data on each time slice for i in range(len(target_dataset.times)): @@ -286,7 +291,7 @@ def spatial_regrid(target_dataset, new_latitudes, new_longitudes, values_false_indices = np.where(values_original.mask == False) qmdi[values_true_indices] = 1. qmdi[values_false_indices] = 0. - qmdi_r = griddata((lons.flatten(), lats.flatten()), qmdi.flatten(), + qmdi_r = griddata((lons.flatten(), lats.flatten()), qmdi.flatten(), (new_lons[new_index], new_lats[new_index]), method='nearest') @@ -1441,7 +1446,7 @@ def _are_bounds_contained_by_dataset(dataset, bounds): ''' lat_min, lat_max, lon_min, lon_max = dataset.spatial_boundaries() start, end = dataset.temporal_boundaries() - + errors = [] # TODO: THIS IS TERRIBLY inefficent and we need to use a geometry From 94fb8d6bc5f817009074fbc47d1ac44f2492f1be Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 28 Sep 2017 02:17:01 -0700 Subject: [PATCH 11/16] Incorrect indices used for temporal subset after trimming --- ocw/dataset_processor.py | 6 ++--- ocw/utils.py | 58 +++++++++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 22 deletions(-) diff --git a/ocw/dataset_processor.py b/ocw/dataset_processor.py index 22278927..160ffb72 100755 --- a/ocw/dataset_processor.py +++ b/ocw/dataset_processor.py @@ -83,9 +83,9 @@ def temporal_subset(target_dataset, month_start, month_end, logger.warning("Number of times in dataset ({}) does not " "divide evenly into {} year(s). Trimming data..." .format(ntime, nyear)) - s_mon = new_times[0].month - e_mon = new_times[-1].month - new_times = new_times[13-s_mon:-e_mon] + slc = utils.trim_dataset(new_dataset) + new_dataset.values = new_dataset.values[slc] + new_times = new_times[slc] nyear = new_times.size // nmonth averaged_time = [] diff --git a/ocw/utils.py b/ocw/utils.py index 0a798b08..c2b62cfe 100755 --- a/ocw/utils.py +++ b/ocw/utils.py @@ -383,14 +383,34 @@ def get_temporal_overlap(dataset_array): ''' Find the maximum temporal overlap across the observation and model datasets :param dataset_array: an array of OCW datasets + :param idx: start and end indices to denote subset of months used. + :type idx: class:`tuple` ''' - start_time = [] - end_time = [] + start_times = [] + end_times = [] for dataset in dataset_array: - start_time.append(dataset.temporal_boundaries()[0]) - end_time.append(dataset.temporal_boundaries()[1]) + slc = trim_dataset(dataset) + times = dataset.times[slc] + start_time, end_time = times[0], times[-1] + start_times.append(start_time) + end_times.append(end_time) - return np.max(start_time), np.min(end_time) + return np.max(start_times), np.min(end_times) + + +def trim_dataset(dataset): + ''' Trim datasets such that first and last year of data have all 12 months + + :param dataset: Dataset object + :type dataset: :class:`dataset.Dataset + + :returns: Slice index for trimmed dataset + ''' + start_time, end_time = dataset.temporal_boundaries() + start_month = 13 if start_time.month == 1 else start_time.month + end_month = 0 if end_time.month == 12 else end_time.month + slc = slice(13 - start_month, len(dataset.times) - end_month) + return slc def calc_subregion_area_mean_and_std(dataset_array, subregions): @@ -505,33 +525,33 @@ def CORDEX_boundary(domain_name): :param domain_name: CORDEX domain name (http://www.cordex.org/) :type domain_name: :mod:'string' ''' - if domain_name == 'southamerica' or domain_name == 'SAM': + if domain_name == 'southamerica' or domain_name == 'sam': return -57.61, 18.50, 254.28 - 360., 343.02 - 360. - elif domain_name == 'centralamerica' or domain_name == 'CAM': + elif domain_name == 'centralamerica' or domain_name == 'cam': return -19.46, 34.83, 235.74 - 360., 337.78 - 360. - elif domain_name == 'northamerica' or domain_name == 'NAM': + elif domain_name == 'northamerica' or domain_name == 'nam': return 12.55, 75.88, 189.26 - 360., 336.74 - 360. - elif domain_name == 'europe' or domain_name == 'EUR': + elif domain_name == 'europe' or domain_name == 'eur': return 22.20, 71.84, 338.23 - 360., 64.4 - elif domain_name == 'africa' or domain_name == 'AFR': + elif domain_name == 'africa' or domain_name == 'afr': return -45.76, 42.24, 335.36 - 360., 60.28 - elif domain_name == 'southasia' or domain_name == 'WAS': + elif domain_name == 'southasia' or domain_name == 'was': return -15.23, 45.07, 19.88, 115.55 - elif domain_name == 'eastasia' or domain_name == 'EAS': + elif domain_name == 'eastasia' or domain_name == 'eas': return -0.10, 61.90, 51.59, 179.99 - elif domain_name == 'centralasia' or domain_name == 'CAS': + elif domain_name == 'centralasia' or domain_name == 'cas': return 18.34, 69.37, 11.05, 139.13 - elif domain_name == 'australasia' or domain_name == 'AUS': + elif domain_name == 'australasia' or domain_name == 'aus': return -52.36, 12.21, 89.25, 179.99 - elif domain_name == 'antartica' or domain_name == 'ANT': + elif domain_name == 'antartica' or domain_name == 'ant': return -89.48, -56.00, -179.00, 179.00 - elif domain_name == 'artic' or domain_name == 'ARC': + elif domain_name == 'artic' or domain_name == 'arc': return 46.06, 89.50, -179.00, 179.00 - elif domain_name == 'mediterranean' or domain_name == 'MED': + elif domain_name == 'mediterranean' or domain_name == 'med': return 25.63, 56.66, 339.79 - 360.00, 50.85 - elif domain_name == 'middleeastnorthafrica' or domain_name == 'MNA': + elif domain_name == 'middleeastnorthafrica' or domain_name == 'mna': return -7.00, 45.00, 333.00 - 360.00, 76.00 - elif domain_name == 'southeastasia' or domain_name == 'SEA': + elif domain_name == 'southeastasia' or domain_name == 'sea': return -15.14, 27.26, 89.26, 146.96 else: err = "Invalid CORDEX domain name" From 032c317be039565645a5c41e39415166720e1109 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 28 Sep 2017 21:59:13 -0700 Subject: [PATCH 12/16] CLIMATE-930 - multiple file loader forces 2D lats and lons --- ocw/data_source/local.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ocw/data_source/local.py b/ocw/data_source/local.py index 896032f9..45a57f9b 100644 --- a/ocw/data_source/local.py +++ b/ocw/data_source/local.py @@ -518,11 +518,8 @@ def load_dataset_from_multiple_netcdf_files(variable_name, variable_unit=None, dataset0 = load_file(nc_files[0], variable_name, lat_name=lat_name, lon_name=lon_name, time_name=time_name) - if dataset0.lons.ndim == 1 and dataset0.lats.ndim == 1: - lons, lats = numpy.meshgrid(dataset0.lons, dataset0.lats) - elif dataset0.lons.ndim == 2 and dataset0.lats.ndim == 2: - lons = dataset0.lons - lats = dataset0.lats + lons = dataset0.lons + lats = dataset0.lats if mask_file: mask_dataset = load_file(mask_file, mask_variable) From 77463b9267e20ebf46e625f394eb35d194fd93f0 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 28 Sep 2017 22:28:17 -0700 Subject: [PATCH 13/16] Update local_multiple_files tests for 1D lats and lons --- ocw/tests/test_local.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ocw/tests/test_local.py b/ocw/tests/test_local.py index 0cca005e..4ce5a271 100644 --- a/ocw/tests/test_local.py +++ b/ocw/tests/test_local.py @@ -21,7 +21,7 @@ from urllib.request import urlretrieve except ImportError: from urllib import urlretrieve - + import datetime import unittest import os @@ -229,12 +229,10 @@ def test_variable_name(self): def test_function_load_dataset_from_multiple_netcdf_files_lats(self): """Test load_multiple_files function for times.""" - _, self.latitudes = np.meshgrid(self.longitudes, self.latitudes) np.testing.assert_array_equal(self.dataset.lats, self.latitudes) def test_function_load_dataset_from_multiple_netcdf_files_lons(self): """Test load_multiple_files function for times.""" - self.longitudes, _ = np.meshgrid(self.longitudes, self.latitudes) np.testing.assert_array_equal(self.dataset.lons, self.longitudes) def test_function_load_dataset_from_multiple_netcdf_files_times(self): @@ -245,12 +243,10 @@ def test_function_load_dataset_from_multiple_netcdf_files_times(self): def test_function_load_dataset_from_multiple_netcdf_files_alt_lats(self): """Test load_multiple_files function for non-default lats.""" - _, self.alt_lats = np.meshgrid(self.alt_lons, self.alt_lats) np.testing.assert_array_equal(self.alt_dataset.lats, self.alt_lats) def test_function_load_dataset_from_multiple_netcdf_files_alt_lons(self): """Test load_multiple_files function for non-default lons.""" - self.alt_lons, _ = np.meshgrid(self.alt_lons, self.alt_lats) np.testing.assert_array_equal(self.alt_dataset.lons, self.alt_lons) def test_function_load_dataset_from_multiple_netcdf_files_alt_times(self): From fb02c74925b977abd2c03f19ea0ccd9b15faec23 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 28 Sep 2017 23:11:51 -0700 Subject: [PATCH 14/16] CLIMATE-927 - RCMES script for running multiple evaluations --- RCMES/CORDEX/cordex.py | 56 ++++++++++++++++ RCMES/CORDEX/metadata_extractor.py | 35 ++++++++-- RCMES/CORDEX/templates/CORDEX.yaml.template | 8 +-- RCMES/metrics_and_plots.py | 2 +- RCMES/run_RCMES.py | 24 ++++--- ocw/plotter.py | 74 ++++++++++----------- 6 files changed, 143 insertions(+), 56 deletions(-) create mode 100644 RCMES/CORDEX/cordex.py diff --git a/RCMES/CORDEX/cordex.py b/RCMES/CORDEX/cordex.py new file mode 100644 index 00000000..dc80be4e --- /dev/null +++ b/RCMES/CORDEX/cordex.py @@ -0,0 +1,56 @@ +import os +import subprocess +import jinja2 +from metadata_extractor import CORDEXMetadataExtractor, obs4MIPSMetadataExtractor + +# These should be modified. TODO: domains can also be made into separate group +# CORDEX domain +domain = 'NAM-44' + +# The output directory +workdir = '/home/goodman/data_processing/CORDEX/analysis' + +# Location of osb4Mips files +obs_dir = '/proj3/data/obs4mips' + +# Location of CORDEX files +models_dir = '/proj3/data/CORDEX/{domain}/*'.format(domain=domain) + +# Extract metadata from model and obs files, pairing up files with the same +# variables for separate evaluations +obs_extractor = obs4MIPSMetadataExtractor(obs_dir) +models_extractor = CORDEXMetadataExtractor(models_dir) +groups = obs_extractor.group(models_extractor, 'variable') + +# Configuration file template, to be rendered repeatedly for each evaluation +# run +env = jinja2.Environment(loader=jinja2.FileSystemLoader('./templates'), + trim_blocks=True, lstrip_blocks=True) +t = env.get_template('CORDEX.yaml.template') + +# Each group represents a single evaluation. Repeat the evaluation for +# three seasons: Summer, Winter, and Annual. +seasons = ['annual', 'winter', 'summer'] +for group in groups: + obs_info, models_info = group + instrument = obs_info['instrument'] + variable = obs_info['variable'] + for season in seasons: + configfile_basename = '_'.join([domain, instrument, variable, season]) + '.yaml' + configfile_path = os.path.join(workdir, domain, instrument, + variable, season) + if not os.path.exists(configfile_path): + os.makedirs(configfile_path) + configfile_path = os.path.join(configfile_path, configfile_basename) + with open(configfile_path, 'w') as configfile: + configfile.write(t.render(obs_info=obs_info, models_info=models_info, + season=season, output_dir=workdir)) + + # TODO: Do this in parallel. Will change this once this approach + # is well tested. + code = subprocess.call(['python', '../run_RCMES.py', configfile_path]) + errored = [] + if code: + errored.append(configfile_path) + +print("All runs done. The following ended with an error: {}".format(errored)) diff --git a/RCMES/CORDEX/metadata_extractor.py b/RCMES/CORDEX/metadata_extractor.py index cb18946b..b97d890a 100644 --- a/RCMES/CORDEX/metadata_extractor.py +++ b/RCMES/CORDEX/metadata_extractor.py @@ -84,6 +84,13 @@ def variables(self): """ return self.get_field('variable') + @property + def field_filters(self): + """ + Override this to filter out specific characters contained in a field. + """ + return dict() + def query(self, **kwargs): """ Narrow down the list of files by field names. @@ -95,7 +102,8 @@ def query(self, **kwargs): data = self.data for field, value in kwargs.items(): value = value if isinstance(value, list) else [value] - data = [meta for meta in data if meta[field] in value] + data = [meta for meta in data + if self._match_filter(meta, field) in value] return data def group(self, extractor, field): @@ -116,7 +124,7 @@ def group(self, extractor, field): groups = [] for meta in results: - val = meta[field] + val = self._match_filter(meta, field) kwargs.update({field: val}) match = extractor.query(**kwargs) groups.append((meta, match)) @@ -151,6 +159,16 @@ def get_pattern(self, fname): pattern = '_'.join(base[:len(self.fields)] + ['*.nc']) return pattern + def _match_filter(self, meta, field): + """ + Filter (ignore) certain character patterns when matching a field. + """ + val = meta[field] + if field in self.field_filters: + for pattern in self.field_filters[field]: + val = val.replace(pattern, '') + return val + def _extract(self): """ Do the actual metadata extraction from the list of filename given @@ -183,6 +201,13 @@ def fields(self): fields = ['variable', 'instrument', 'processing_level', 'version'] return fields + @property + def field_filters(self): + """ + Field filters for CALIPSO + """ + return dict(variable=['calipso', 'Lidarsr532']) + def filter_filename(self, fname): """ CALIPSO files have odd naming conventions, so we will use @@ -190,8 +215,6 @@ def filter_filename(self, fname): """ fname = os.path.basename(fname) fname = fname.replace('_obs4MIPs_', '_') - fname = fname.replace('calipso', '') - fname = fname.replace('Lidarsr532', '') return fname def get_pattern(self, fname): @@ -202,8 +225,8 @@ def get_pattern(self, fname): offset = -2 if len(base) != 5 else -1 pattern = '_'.join(base[:offset] + ['*.nc']) return pattern - - + + class CORDEXMetadataExtractor(MetadataExtractor): @property def models(self): diff --git a/RCMES/CORDEX/templates/CORDEX.yaml.template b/RCMES/CORDEX/templates/CORDEX.yaml.template index daf6ec52..c0a19943 100644 --- a/RCMES/CORDEX/templates/CORDEX.yaml.template +++ b/RCMES/CORDEX/templates/CORDEX.yaml.template @@ -1,6 +1,6 @@ {% set domain = models_info[0].domain %} {% set instrument = obs_info.instrument %} -{% set variable = obs_info.variable %} +{% set variable = models_info[0].variable %} {% set basename = [variable, instrument, domain, season]|join('_') %} workdir: {{ [output_dir, domain, instrument, variable, season]|join('/') }} output_netcdf_filename: {{ basename }}.nc @@ -23,7 +23,7 @@ time: average_each_year: True space: - boundary_type: {{ domain[:3] }} + boundary_type: CORDEX {{ domain[:3] }} regrid: regrid_on_reference: True @@ -32,12 +32,12 @@ datasets: - loader_name: local_split name: {{ instrument }} file_path: {{ obs_info.filename }} - variable_name: {{ variable }} + variable_name: {{ obs_info.variable }} {% for model_info in models_info %} - loader_name: local_split name: {{ model_info.model }} file_path: {{ model_info.filename }} - variable_name: {{ variable }} + variable_name: {{ model_info.variable }} lat_name: lat lon_name: lon {% endfor %} diff --git a/RCMES/metrics_and_plots.py b/RCMES/metrics_and_plots.py index 78c2f14f..0eb1f022 100644 --- a/RCMES/metrics_and_plots.py +++ b/RCMES/metrics_and_plots.py @@ -59,7 +59,7 @@ def Map_plot_bias_of_multiyear_climatology(obs_dataset, obs_name, model_datasets string_list = list(string.ascii_lowercase) nmodels = len(model_datasets) - row, column = plotter._best_grid_shape((row, column), nmodels + 1) + row, column = plotter._best_grid_shape(nmodels + 1, (row, column)) ax = fig.add_subplot(row,column,1) if map_projection == 'npstere': m = Basemap(ax=ax, projection ='npstere', boundinglat=lat_min, lon_0=0, diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py index 76412998..f9ec2c37 100644 --- a/RCMES/run_RCMES.py +++ b/RCMES/run_RCMES.py @@ -25,6 +25,11 @@ from glob import glob from getpass import getpass import numpy as np + +# Need these lines to run RCMES through SSH without X11 +import matplotlib +matplotlib.use('Agg') + import ocw.utils as utils import ocw.dataset_processor as dsp from ocw.dataset import Bounds @@ -68,7 +73,7 @@ def load_datasets_from_config(extra_opts, *loader_opts): temporal_resolution = time_info['temporal_resolution'] # Read time info -maximum_overlap_period = space_info.get('maximum_overlap_period', False) +maximum_overlap_period = time_info.get('maximum_overlap_period', False) if not maximum_overlap_period: start_time = datetime.strptime(time_info['start_time'].strftime('%Y%m%d'),'%Y%m%d') end_time = datetime.strptime(time_info['end_time'].strftime('%Y%m%d'),'%Y%m%d') @@ -86,7 +91,8 @@ def load_datasets_from_config(extra_opts, *loader_opts): else: domain = space_info['boundary_type'] if domain.startswith('CORDEX '): - domain = domain.replace('CORDEX ', '').lower() + domain = domain.replace('CORDEX', '').lower() + domain = domain.replace(' ', '') min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(domain) # Additional arguments for the DatasetLoader @@ -110,9 +116,9 @@ def load_datasets_from_config(extra_opts, *loader_opts): multiplying_factor[0] = fact names = [dataset.name for dataset in datasets] for i, dataset in enumerate(datasets): - if temporal_resolution == 'daily' or temporal_resolution == 'monthly': - datasets[i] = dsp.normalize_dataset_datetimes(dataset, - temporal_resolution) + res = dataset.temporal_resolution() + if res == 'daily' or res == 'monthly': + datasets[i] = dsp.normalize_dataset_datetimes(dataset, res) if multiplying_factor[i] != 1: datasets[i].values *= multiplying_factor[i] @@ -148,7 +154,7 @@ def load_datasets_from_config(extra_opts, *loader_opts): for i, dataset in enumerate(datasets): datasets[i] = dsp.subset(dataset, bounds) if dataset.temporal_resolution() != temporal_resolution: - datasets[i] = dsp.temporal_rebin(dataset, temporal_resolution) + datasets[i] = dsp.temporal_rebin(datasets[i], temporal_resolution) # Temporally subset both observation and model datasets # for the user specified season @@ -269,8 +275,10 @@ def load_datasets_from_config(extra_opts, *loader_opts): file_name = workdir+plot_info['file_name'] print('metrics {0}/{1}: {2}'.format(imetric, nmetrics, metrics_name)) + default_shape = (int(np.ceil(np.sqrt(ntarget + 2))), + int(np.ceil(np.sqrt(ntarget + 2)))) if metrics_name == 'Map_plot_bias_of_multiyear_climatology': - row, column = plot_info.get('subplots_array', (1, 1)) + row, column = plot_info.get('subplots_array', default_shape) if 'map_projection' in plot_info.keys(): Map_plot_bias_of_multiyear_climatology( reference_dataset, reference_name, target_datasets, target_names, @@ -287,7 +295,7 @@ def load_datasets_from_config(extra_opts, *loader_opts): elif config['use_subregions']: if (metrics_name == 'Timeseries_plot_subregion_interannual_variability' and average_each_year): - row, column = plot_info.get('subplots_array', (1, 1)) + row, column = plot_info.get('subplots_array', default_shape) Time_series_subregion( reference_subregion_mean, reference_name, target_subregion_mean, target_names, False, file_name, row, column, diff --git a/ocw/plotter.py b/ocw/plotter.py index 4896a649..7f9b0920 100755 --- a/ocw/plotter.py +++ b/ocw/plotter.py @@ -80,12 +80,12 @@ def _nice_intervals(data, nlevs): else: mnlvl = mn mxlvl = mx - + # hack to make generated intervals from mpl the same for all versions autolimit_mode = mpl.rcParams.get('axes.autolimit_mode') if autolimit_mode: mpl.rc('axes', autolimit_mode='round_numbers') - + locator = mpl.ticker.MaxNLocator(nlevs) clevs = locator.tick_values(mnlvl, mxlvl) if autolimit_mode: @@ -122,14 +122,14 @@ def _best_grid_shape(nplots, oldshape): # rows and columns for gridshape, automatically # correct it so that it fits only as many plots # as needed - while diff >= ncols: - nrows -= 1 + while diff >= nrows: + ncols -= 1 size = nrows * ncols diff = size - nplots # Don't forget to remove unnecessary columns too - if nrows == 1: - ncols = nplots + if ncols == 1: + nrows = nplots newshape = nrows, ncols return newshape @@ -1096,24 +1096,24 @@ def draw_histogram(dataset_array, data_names, fname, fmt='png', nbins=10): def fill_US_states_with_color(regions, fname, fmt='png', ptitle='', colors=False, values=None, region_names=None): - - ''' Fill the States over the contiguous US with colors - - :param regions: The list of subregions(lists of US States) to be filled + + ''' Fill the States over the contiguous US with colors + + :param regions: The list of subregions(lists of US States) to be filled with different colors. - :type regions: :class:`list` + :type regions: :class:`list` :param fname: The filename of the plot. :type fname: :mod:`string` :param fmt: (Optional) filetype for the output. :type fmt: :mod:`string` :param ptitle: (Optional) plot title. :type ptitle: :mod:`string` - :param colors: (Optional) : If True, each region will be filled + :param colors: (Optional) : If True, each region will be filled with different colors without using values - :type colors: :class:`bool` - :param values: (Optional) : If colors==False, color for each region scales - an associated element in values - :type values: :class:`numpy.ndarray` + :type colors: :class:`bool` + :param values: (Optional) : If colors==False, color for each region scales + an associated element in values + :type values: :class:`numpy.ndarray` ''' nregion = len(regions) @@ -1140,7 +1140,7 @@ def fill_US_states_with_color(regions, fname, fmt='png', ptitle='', lons=np.empty(0) for shape in shapes: patches.append(Polygon(np.array(shape), True)) - + lons = np.append(lons, shape[:,0]) lats = np.append(lats, shape[:,1]) if colors: @@ -1164,13 +1164,13 @@ def draw_plot_to_compare_trends(obs_data, ens_data, model_data, fname, fmt='png', ptitle='', data_labels=None, xlabel='', ylabel=''): - ''' Fill the States over the contiguous US with colors - + ''' Fill the States over the contiguous US with colors + :param obs_data: An array of observed trend and standard errors for regions :type obs_data: :class:'numpy.ndarray' - :param ens_data: An array of trend and standard errors from a multi-model ensemble for regions + :param ens_data: An array of trend and standard errors from a multi-model ensemble for regions :type ens_data: : class:'numpy.ndarray' - :param model_data: An array of trends from models for regions + :param model_data: An array of trends from models for regions :type model_data: : class:'numpy.ndarray' :param fname: The filename of the plot. :type fname: :mod:`string` @@ -1182,36 +1182,36 @@ def draw_plot_to_compare_trends(obs_data, ens_data, model_data, :type data_labels: :mod:`list` :param xlabel: (Optional) a label for x-axis :type xlabel: :mod:`string` - :param ylabel: (Optional) a label for y-axis + :param ylabel: (Optional) a label for y-axis :type ylabel: :mod:`string` ''' - nregions = obs_data.shape[1] + nregions = obs_data.shape[1] # Set up the figure fig = plt.figure() fig.set_size_inches((8.5, 11.)) fig.dpi = 300 ax = fig.add_subplot(111) - - b_plot = ax.boxplot(model_data, widths=np.repeat(0.2, nregions), positions=np.arange(nregions)+1.3) + + b_plot = ax.boxplot(model_data, widths=np.repeat(0.2, nregions), positions=np.arange(nregions)+1.3) plt.setp(b_plot['medians'], color='black') plt.setp(b_plot['whiskers'], color='black') plt.setp(b_plot['boxes'], color='black') plt.setp(b_plot['fliers'], color='black') - ax.errorbar(np.arange(nregions)+0.8, obs_data[0,:], yerr=obs_data[1,:], - fmt='o', color='r', ecolor='r') - ax.errorbar(np.arange(nregions)+1., ens_data[0,:], yerr=ens_data[1,:], - fmt='o', color='b', ecolor='b') + ax.errorbar(np.arange(nregions)+0.8, obs_data[0,:], yerr=obs_data[1,:], + fmt='o', color='r', ecolor='r') + ax.errorbar(np.arange(nregions)+1., ens_data[0,:], yerr=ens_data[1,:], + fmt='o', color='b', ecolor='b') ax.set_xticks(np.arange(nregions)+1) ax.set_xlim([0, nregions+1]) - + if data_labels: ax.set_xticklabels(data_labels) - fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight') + fig.savefig('%s.%s' % (fname, fmt), bbox_inches='tight') -def draw_precipitation_JPDF (plot_data, plot_level, x_ticks, x_names,y_ticks,y_names, - output_file, title=None, diff_plot=False, cmap = cm.BrBG, - cbar_ticks=[0.01, 0.10, 0.5, 2, 5, 25], +def draw_precipitation_JPDF (plot_data, plot_level, x_ticks, x_names,y_ticks,y_names, + output_file, title=None, diff_plot=False, cmap = cm.BrBG, + cbar_ticks=[0.01, 0.10, 0.5, 2, 5, 25], cbar_label=['0.01', '0.10', '0.5', '2', '5', '25']): ''' :param plot_data: a numpy array of data to plot (dimY, dimX) @@ -1219,11 +1219,11 @@ def draw_precipitation_JPDF (plot_data, plot_level, x_ticks, x_names,y_ticks,y_n :param plot_level: levels to plot :type plot_level: :class:'numpy.ndarray' :param x_ticks: x values where tick makrs are located - :type x_ticks: :class:'numpy.ndarray' + :type x_ticks: :class:'numpy.ndarray' :param x_names: labels for the ticks on x-axis (dimX) :type x_names: :class:'list' :param y_ticks: y values where tick makrs are located - :type y_ticks: :class:'numpy.ndarray' + :type y_ticks: :class:'numpy.ndarray' :param y_names: labels for the ticks on y-axis (dimY) :type y_names: :class:'list' :param output_file: name of output png file @@ -1246,7 +1246,7 @@ def draw_precipitation_JPDF (plot_data, plot_level, x_ticks, x_names,y_ticks,y_n dimY, dimX = plot_data.shape plot_data2 = np.zeros([dimY,dimX]) # sectioned array for plotting - # fontsize + # fontsize rcParams['axes.labelsize'] = 8 rcParams['xtick.labelsize'] = 8 rcParams['ytick.labelsize'] = 8 From 58d6cdf16e52da9c326381718a80ecc33e880eb4 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 28 Sep 2017 23:33:13 -0700 Subject: [PATCH 15/16] Update plotter unit tests for _best_gridshape changes --- ocw/tests/test_plotter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocw/tests/test_plotter.py b/ocw/tests/test_plotter.py index 19ba66a7..894e0c4b 100644 --- a/ocw/tests/test_plotter.py +++ b/ocw/tests/test_plotter.py @@ -51,14 +51,14 @@ class TestBestGridShapeFunction(unittest.TestCase): def test_returned_shape_small(self): nplots = 2 oldshape = (2, 2) - expected_shape = (1, 2) + expected_shape = (2, 1) new_shape = plotter._best_grid_shape(nplots, oldshape) self.assertEqual(new_shape, expected_shape) def test_returned_shape_large(self): nplots = 57 - oldshape = (220, 12) - expected_shape = (5, 12) + oldshape = (12, 220) + expected_shape = (12, 5) new_shape = plotter._best_grid_shape(nplots, oldshape) self.assertEqual(new_shape, expected_shape) From e7d99b6ccf00b2f406a3b93217f8512c3dfe2184 Mon Sep 17 00:00:00 2001 From: huikyole Date: Sun, 22 Oct 2017 19:34:40 -0700 Subject: [PATCH 16/16] CLIMATE-932 - A whitespace error in running the CORDEX script - RCMES/run_RCMES.py has been fixed --- RCMES/run_RCMES.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/RCMES/run_RCMES.py b/RCMES/run_RCMES.py index f9ec2c37..3c7291e4 100644 --- a/RCMES/run_RCMES.py +++ b/RCMES/run_RCMES.py @@ -90,11 +90,12 @@ def load_datasets_from_config(extra_opts, *loader_opts): max_lon = space_info['max_lon'] else: domain = space_info['boundary_type'] - if domain.startswith('CORDEX '): + if 'CORDEX' in domain: domain = domain.replace('CORDEX', '').lower() - domain = domain.replace(' ', '') + domain = domain.strip() min_lat, max_lat, min_lon, max_lon = utils.CORDEX_boundary(domain) + # Additional arguments for the DatasetLoader extra_opts = {'min_lat': min_lat, 'max_lat': max_lat, 'min_lon': min_lon, 'max_lon': max_lon, 'start_time': start_time,