diff --git a/ocw/tests/TestGetNetcdfVariableNames.nc b/ocw/tests/TestGetNetcdfVariableNames.nc new file mode 100644 index 00000000..13e2544d Binary files /dev/null and b/ocw/tests/TestGetNetcdfVariableNames.nc differ diff --git a/ocw/tests/test_local.py b/ocw/tests/test_local.py index 0927f0f9..0cca005e 100644 --- a/ocw/tests/test_local.py +++ b/ocw/tests/test_local.py @@ -14,8 +14,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +"""Tests for local.py, an OCW (netCDF, HDF5) file loading library.""" -# Needed Python 2/3 urllib compatability +# Needed Python 2/3 urllib compatibility try: from urllib.request import urlretrieve except ImportError: @@ -30,125 +31,150 @@ import ocw.data_source.local as local -class test_load_file(unittest.TestCase): +class TestLoadFile(unittest.TestCase): + """Tests for load_file method.""" + + @classmethod + def setUpClass(cls): + """Prepare a netCDF file once to use for all tests.""" + cls.file_path = create_netcdf_file() + + @classmethod + def tearDownClass(cls): + """Remove the no longer needed testing file at the end of the tests.""" + os.remove(cls.file_path) def setUp(self): - # Read netCDF file - self.file_path = create_netcdf_object() - self.netCDF_file = netCDF4.Dataset(self.file_path, 'r') - self.latitudes = self.netCDF_file.variables['latitude'][:] - self.longitudes = self.netCDF_file.variables['longitude'][:] - self.alt_lats = self.netCDF_file.variables['alt_lat'][:] - self.alt_lons = self.netCDF_file.variables['alt_lon'][:] - self.values = self.netCDF_file.variables['value'][:] - self.variable_name_list = ['latitude', - 'longitude', 'time', 'level', 'value'] + """Open and read in attributes of netCDF test file.""" + self.netcdf_file = netCDF4.Dataset(self.file_path, 'r') + self.latitudes = self.netcdf_file.variables['latitude'][:] + self.longitudes = self.netcdf_file.variables['longitude'][:] + self.alt_lats = self.netcdf_file.variables['alt_lat'][:] + self.alt_lons = self.netcdf_file.variables['alt_lon'][:] + self.values = self.netcdf_file.variables['value'][:] + self.variable_name_list = ['latitude', 'longitude', 'time', 'level', + 'value'] self.possible_value_name = ['latitude', 'longitude', 'time', 'level'] def tearDown(self): - os.remove(self.file_path) + """Close file object so that it may be re-read in the next test.""" + self.netcdf_file.close() def test_load_invalid_file_path(self): + """To test load_file an invalid path raises an exception.""" self.invalid_netcdf_path = '/invalid/path' with self.assertRaises(ValueError): local.load_file(file_path=self.invalid_netcdf_path, variable_name='test variable') def test_function_load_file_lats(self): - """To test load_file function for latitudes""" + """Test load_file function for latitudes.""" np.testing.assert_array_equal(local.load_file( self.file_path, "value").lats, self.latitudes) def test_function_load_file_lons(self): - """To test load_file function for longitudes""" + """Test load_file function for longitudes.""" np.testing.assert_array_equal(local.load_file( self.file_path, "value").lons, self.longitudes) def test_function_load_file_times(self): - """To test load_file function for times""" - newTimes = datetime.datetime(2001, 1, 1), datetime.datetime( + """Test load_file function for times.""" + new_times = datetime.datetime(2001, 1, 1), datetime.datetime( 2001, 2, 1), datetime.datetime(2001, 3, 1) np.testing.assert_array_equal(local.load_file( - self.file_path, "value").times, newTimes) + self.file_path, "value").times, new_times) def test_function_load_file_alt_lats(self): - """To test load_file function for lats with different variable names""" + """Test load_file function for lats with different variable names.""" np.testing.assert_array_equal(local.load_file( self.file_path, "value", lat_name="alt_lat").lats, self.alt_lats) def test_function_load_file_alt_lons(self): - """To test load_file function for lons with different variable names""" + """Test load_file function for lons with different variable names.""" np.testing.assert_array_equal(local.load_file( self.file_path, "value", lon_name="alt_lon").lons, self.alt_lons) def test_function_load_file_alt_times(self): - """To test load_file function for times with different variable names""" - newTimes = datetime.datetime(2001, 4, 1), datetime.datetime( + """Test load_file function for times with different variable names.""" + new_times = datetime.datetime(2001, 4, 1), datetime.datetime( 2001, 5, 1), datetime.datetime(2001, 6, 1) np.testing.assert_array_equal(local.load_file( - self.file_path, "value", time_name="alt_time").times, newTimes) + self.file_path, "value", time_name="alt_time").times, new_times) def test_function_load_file_values(self): - """To test load_file function for values""" + """Test load_file function for values.""" new_values = self.values[:, 0, :, :] self.assertTrue(np.allclose(local.load_file( self.file_path, "value").values, new_values)) def test_custom_dataset_name(self): - """Test adding a custom name to a dataset""" - ds = local.load_file(self.file_path, 'value', name='foo') - self.assertEqual(ds.name, 'foo') + """Test adding a custom name to a dataset.""" + dataset = local.load_file(self.file_path, 'value', name='foo') + self.assertEqual(dataset.name, 'foo') def test_dataset_origin(self): - ds = local.load_file(self.file_path, 'value', elevation_index=1) - expected_keys = set(['source', 'path', 'lat_name', 'lon_name', - 'time_name', 'elevation_index']) - self.assertEqual(set(ds.origin.keys()), expected_keys) - self.assertEqual(ds.origin['source'], 'local') + """Test that dataset origin is local.""" + dataset = local.load_file(self.file_path, 'value', elevation_index=1) + expected_keys = {'source', 'path', 'lat_name', 'lon_name', 'time_name', + 'elevation_index'} + self.assertEqual(set(dataset.origin.keys()), expected_keys) + self.assertEqual(dataset.origin['source'], 'local') class TestLoadMultipleFiles(unittest.TestCase): + """Tests for the load_multiple_files method.""" + + @classmethod + def setUpClass(cls): + """Prepare a netCDF file once to use for all tests.""" + cls.file_path = create_netcdf_file() + + @classmethod + def tearDownClass(cls): + """Remove the no longer needed testing file at the end of the tests.""" + os.remove(cls.file_path) def setUp(self): - # Read netCDF file - self.file_path = create_netcdf_object() - self.netCDF_file = netCDF4.Dataset(self.file_path, 'r') - self.latitudes = self.netCDF_file.variables['latitude'][:] - self.longitudes = self.netCDF_file.variables['longitude'][:] - self.values = self.netCDF_file.variables['value'][:] + """Open and read in attributes of netCDF test file.""" + self.netcdf_file = netCDF4.Dataset(self.file_path, 'r') + self.latitudes = self.netcdf_file.variables['latitude'][:] + self.longitudes = self.netcdf_file.variables['longitude'][:] + self.values = self.netcdf_file.variables['value'][:] self.variable_name_list = ['latitude', 'longitude', 'time', 'level', 'value'] self.possible_value_name = ['latitude', 'longitude', 'time', 'level'] def tearDown(self): - os.remove(self.file_path) + """Close file object so that it may be re-read in the next test.""" + self.netcdf_file.close() def test_function_load_multiple_files_data_name(self): + """Test load_multiple_file function for dataset name.""" dataset = local.load_multiple_files(self.file_path, "value") self.assertEqual([dataset[0].name], ['']) def test_function_load_multiple_files_lons(self): - """To test load_multiple_file function for longitudes""" + """Test load_multiple_file function for longitudes.""" dataset = local.load_multiple_files(self.file_path, "value") np.testing.assert_array_equal(dataset[0].lons, self.longitudes) def test_function_load_multiple_files_times(self): - """To test load_multiple_files function for times""" + """Test load_multiple_files function for times.""" dataset = local.load_multiple_files(self.file_path, "value") - newTimes = datetime.datetime(2001, 1, 1), datetime.datetime( + new_times = datetime.datetime(2001, 1, 1), datetime.datetime( 2001, 2, 1), datetime.datetime(2001, 3, 1) - np.testing.assert_array_equal(dataset[0].times, newTimes) + np.testing.assert_array_equal(dataset[0].times, new_times) def test_function_load_multiple_files_values(self): - """To test load_multiple_files function for values""" + """Test load_multiple_files function for values.""" new_values = self.values[:, 0, :, :] dataset = local.load_multiple_files( self.file_path, "value") self.assertTrue(np.allclose(dataset[0].values, new_values)) def test_load_multiple_files_custom_dataset_name(self): - """Test adding a custom name to a dataset""" + """Test adding a custom name to a dataset.""" dataset = local.load_multiple_files(self.file_path, "value", generic_dataset_name=True, @@ -156,104 +182,122 @@ def test_load_multiple_files_custom_dataset_name(self): self.assertEqual(dataset[0].name, 'foo') def test_dataset_origin(self): + """Test that dataset origin is local.""" dataset = local.load_multiple_files(self.file_path, 'value') - expected_keys = set(['source', 'path', 'lat_name', 'lon_name', - 'time_name']) + expected_keys = {'source', 'path', 'lat_name', 'lon_name', 'time_name'} self.assertEqual(set(dataset[0].origin.keys()), expected_keys) self.assertEqual(dataset[0].origin['source'], 'local') class TestLoadDatasetFromMultipleNetcdfFiles(unittest.TestCase): - - def setUp(self): - self.file_path = create_netcdf_object() - self.netCDF_file = netCDF4.Dataset(self.file_path, 'r+') - self.latitudes = self.netCDF_file.variables['latitude'][:] - self.longitudes = self.netCDF_file.variables['longitude'][:] - self.alt_lats = self.netCDF_file.variables['alt_lat'][:] - self.alt_lons = self.netCDF_file.variables['alt_lon'][:] - self.values = self.netCDF_file.variables['value'][:] - self.variable_name_list = ['latitude', - 'longitude', 'time', 'level', 'value'] - self.possible_value_name = ['latitude', 'longitude', 'time', 'level'] - self.dataset = local.load_dataset_from_multiple_netcdf_files( + """Tests for load_dataset_from_multiple_netcdf_files method.""" + + @classmethod + def setUpClass(cls): + """Create, read in, and record attributes of a netCDF file for tests.""" + cls.file_path = create_netcdf_file() + cls.netcdf_file = netCDF4.Dataset(cls.file_path, 'r') + cls.latitudes = cls.netcdf_file.variables['latitude'][:] + cls.longitudes = cls.netcdf_file.variables['longitude'][:] + cls.alt_lats = cls.netcdf_file.variables['alt_lat'][:] + cls.alt_lons = cls.netcdf_file.variables['alt_lon'][:] + cls.values = cls.netcdf_file.variables['value'][:] + cls.variable_name_list = ['latitude', 'longitude', 'time', 'level', + 'value'] + cls.possible_value_name = ['latitude', 'longitude', 'time', 'level'] + cls.dataset = local.load_dataset_from_multiple_netcdf_files( variable_name='value', file_path='', - filename_pattern=[ - self.file_path]) - self.alt_dataset = local.load_dataset_from_multiple_netcdf_files( + filename_pattern=[cls.file_path]) + cls.alt_dataset = local.load_dataset_from_multiple_netcdf_files( variable_name='value', lat_name='alt_lat', lon_name='alt_lon', time_name='alt_time', file_path='', - filename_pattern=[ - self.file_path]) + filename_pattern=[cls.file_path]) - def tearDown(self): - os.remove(self.file_path) + @classmethod + def tearDownClass(cls): + """Remove the no longer needed testing file at the end of the tests.""" + cls.netcdf_file.close() + os.remove(cls.file_path) def test_variable_name(self): + """Test that dataset contains a variable value.""" self.assertEqual(self.dataset.variable, 'value') def test_function_load_dataset_from_multiple_netcdf_files_lats(self): - """To test load_multiple_files function for times""" + """Test load_multiple_files function for times.""" _, self.latitudes = np.meshgrid(self.longitudes, self.latitudes) np.testing.assert_array_equal(self.dataset.lats, self.latitudes) def test_function_load_dataset_from_multiple_netcdf_files_lons(self): - """To test load_multiple_files function for times""" + """Test load_multiple_files function for times.""" self.longitudes, _ = np.meshgrid(self.longitudes, self.latitudes) np.testing.assert_array_equal(self.dataset.lons, self.longitudes) def test_function_load_dataset_from_multiple_netcdf_files_times(self): - """To test load_multiple_files function for times""" - newTimes = datetime.datetime(2001, 1, 1), datetime.datetime( + """Test load_multiple_files function for times.""" + new_times = datetime.datetime(2001, 1, 1), datetime.datetime( 2001, 2, 1), datetime.datetime(2001, 3, 1) - np.testing.assert_array_equal(self.dataset.times, newTimes) + np.testing.assert_array_equal(self.dataset.times, new_times) def test_function_load_dataset_from_multiple_netcdf_files_alt_lats(self): - """To test load_multiple_files function for non-default lats""" + """Test load_multiple_files function for non-default lats.""" _, self.alt_lats = np.meshgrid(self.alt_lons, self.alt_lats) np.testing.assert_array_equal(self.alt_dataset.lats, self.alt_lats) def test_function_load_dataset_from_multiple_netcdf_files_alt_lons(self): - """To test load_multiple_files function for non-default lons""" + """Test load_multiple_files function for non-default lons.""" self.alt_lons, _ = np.meshgrid(self.alt_lons, self.alt_lats) np.testing.assert_array_equal(self.alt_dataset.lons, self.alt_lons) def test_function_load_dataset_from_multiple_netcdf_files_alt_times(self): - """To test load_multiple_files function for non-default times""" - newTimes = datetime.datetime(2001, 4, 1), datetime.datetime( + """Test load_multiple_files function for non-default times.""" + new_times = datetime.datetime(2001, 4, 1), datetime.datetime( 2001, 5, 1), datetime.datetime(2001, 6, 1) - np.testing.assert_array_equal(self.alt_dataset.times, newTimes) + np.testing.assert_array_equal(self.alt_dataset.times, new_times) def test_function_load_dataset_from_multiple_netcdf_files_values(self): - """To test load_multiple_files function for values""" + """Test load_multiple_files function for values.""" new_values = self.values[:, 0, :, :] self.assertTrue(np.allclose(self.dataset.values, new_values)) -class test_get_netcdf_variable_names(unittest.TestCase): - file_path = "http://zipper.jpl.nasa.gov/dist/" - test_model = "AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc" +class TestGetNetcdfVariableNames(unittest.TestCase): + """Tests for _get_netcdf_variable_name method retrieving variables. + + TestGetNetcdfVariableNames.nc" is a subset of data from + https://zipper.jpl.nasa.gov/dist/AFRICA_KNMI-RACMO2.2b_CTL_ERAINT_MM_50km_1989-2008_tasmax.nc + Test data obtained with: + ncea -d time,0,0 AFRICA_KNMI-[...]_tasmax.nc \ + TestGetNetcdfVariableNames.nc + """ + + @classmethod + def setUpClass(cls): + """Create a netCDF file with invalid dimensions for tests.""" + cls.test_model = "TestGetNetcdfVariableNames.nc" + cls.invalid_netcdf_path = create_invalid_dimensions_netcdf_file() def setUp(self): - urlretrieve(self.file_path + self.test_model, self.test_model) - self.invalid_netcdf_path = create_invalid_dimensions_netcdf_object() + """Open a valid netCDF file for use in the test.""" self.netcdf = netCDF4.Dataset(self.test_model, mode='r') def tearDown(self): - os.remove(self.invalid_netcdf_path) - os.remove(self.test_model) + """Close file object so that it may be re-read in the next test.""" + self.netcdf.close() def test_valid_latitude(self): + """Test that a latitude variable (rlat) can be found in netCDF file.""" self.lat = local._get_netcdf_variable_name(local.LAT_NAMES, self.netcdf, "tasmax") self.assertEquals(self.lat, "rlat") def test_invalid_dimension_latitude(self): + """Test than an invalid latitude variable can be found in file.""" self.netcdf = netCDF4.Dataset(self.invalid_netcdf_path, mode='r') self.lat = local._get_netcdf_variable_name(local.LAT_NAMES, self.netcdf, @@ -261,6 +305,7 @@ def test_invalid_dimension_latitude(self): self.assertEquals(self.lat, "latitude") def test_dimension_variable_name_mismatch(self): + """Test that mismatched latitude variables are found as latitude.""" self.netcdf = netCDF4.Dataset(self.invalid_netcdf_path, mode='r') self.lat = local._get_netcdf_variable_name( ["lat_dim"] + local.LAT_NAMES, @@ -269,31 +314,33 @@ def test_dimension_variable_name_mismatch(self): self.assertEquals(self.lat, "latitude") def test_no_match_latitude(self): + """Test that retrieving a nonexistent variable name raises exception.""" with self.assertRaises(ValueError): self.lat = local._get_netcdf_variable_name(['notAVarName'], self.netcdf, "tasmax") -def create_netcdf_object(): +def create_netcdf_file(): + """Create a temporary netCDF file with data used for testing.""" # To create the temporary netCDF file file_path = '/tmp/temporaryNetcdf.nc' - netCDF_file = netCDF4.Dataset(file_path, 'w', format='NETCDF4') + netcdf_file = netCDF4.Dataset(file_path, 'w', format='NETCDF4') # To create dimensions - netCDF_file.createDimension('lat_dim', 5) - netCDF_file.createDimension('lon_dim', 5) - netCDF_file.createDimension('time_dim', 3) - netCDF_file.createDimension('level_dim', 2) + netcdf_file.createDimension('lat_dim', 5) + netcdf_file.createDimension('lon_dim', 5) + netcdf_file.createDimension('time_dim', 3) + netcdf_file.createDimension('level_dim', 2) # To create variables - latitudes = netCDF_file.createVariable('latitude', 'd', ('lat_dim',)) - longitudes = netCDF_file.createVariable('longitude', 'd', ('lon_dim',)) - times = netCDF_file.createVariable('time', 'd', ('time_dim',)) + latitudes = netcdf_file.createVariable('latitude', 'd', ('lat_dim',)) + longitudes = netcdf_file.createVariable('longitude', 'd', ('lon_dim',)) + times = netcdf_file.createVariable('time', 'd', ('time_dim',)) # unusual variable names to test optional arguments for Dataset constructor - alt_lats = netCDF_file.createVariable('alt_lat', 'd', ('lat_dim',)) - alt_lons = netCDF_file.createVariable('alt_lon', 'd', ('lon_dim',)) - alt_times = netCDF_file.createVariable('alt_time', 'd', ('time_dim',)) - levels = netCDF_file.createVariable('level', 'd', ('level_dim',)) - values = netCDF_file.createVariable('value', 'd', + alt_lats = netcdf_file.createVariable('alt_lat', 'd', ('lat_dim',)) + alt_lons = netcdf_file.createVariable('alt_lon', 'd', ('lon_dim',)) + alt_times = netcdf_file.createVariable('alt_time', 'd', ('time_dim',)) + levels = netcdf_file.createVariable('level', 'd', ('level_dim',)) + values = netcdf_file.createVariable('value', 'd', ('time_dim', 'level_dim', 'lat_dim', @@ -323,29 +370,30 @@ def create_netcdf_object(): levels[:] = levels_data values[:] = values_data # Assign time info to time variable - netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00' - netCDF_file.variables[ + netcdf_file.variables['time'].units = 'months since 2001-01-01 00:00:00' + netcdf_file.variables[ 'alt_time'].units = 'months since 2001-04-01 00:00:00' - netCDF_file.variables['value'].units = 'foo_units' - netCDF_file.close() + netcdf_file.variables['value'].units = 'foo_units' + netcdf_file.close() return file_path -def create_invalid_dimensions_netcdf_object(): +def create_invalid_dimensions_netcdf_file(): + """Create a temporary netCDF file with invalid dimensions for testing.""" # To create the temporary netCDF file file_path = '/tmp/temporaryNetcdf.nc' - netCDF_file = netCDF4.Dataset(file_path, 'w', format='NETCDF4') + netcdf_file = netCDF4.Dataset(file_path, 'w', format='NETCDF4') # To create dimensions - netCDF_file.createDimension('lat_dim', 5) - netCDF_file.createDimension('lon_dim', 5) - netCDF_file.createDimension('time_dim', 3) - netCDF_file.createDimension('level_dim', 2) + netcdf_file.createDimension('lat_dim', 5) + netcdf_file.createDimension('lon_dim', 5) + netcdf_file.createDimension('time_dim', 3) + netcdf_file.createDimension('level_dim', 2) # To create variables - latitudes = netCDF_file.createVariable('latitude', 'd', ('lat_dim',)) - longitudes = netCDF_file.createVariable('longitude', 'd', ('lon_dim',)) - times = netCDF_file.createVariable('time', 'd', ('time_dim',)) - levels = netCDF_file.createVariable('level', 'd', ('level_dim',)) - values = netCDF_file.createVariable('value', + latitudes = netcdf_file.createVariable('latitude', 'd', ('lat_dim',)) + longitudes = netcdf_file.createVariable('longitude', 'd', ('lon_dim',)) + times = netcdf_file.createVariable('time', 'd', ('time_dim',)) + levels = netcdf_file.createVariable('level', 'd', ('level_dim',)) + values = netcdf_file.createVariable('value', 'd', ('level_dim', 'time_dim', @@ -371,8 +419,8 @@ def create_invalid_dimensions_netcdf_object(): levels[:] = flevels values[:] = fvalues # Assign time info to time variable - netCDF_file.variables['time'].units = 'months since 2001-01-01 00:00:00' - netCDF_file.close() + netcdf_file.variables['time'].units = 'months since 2001-01-01 00:00:00' + netcdf_file.close() return file_path diff --git a/test_smoke.py b/test_smoke.py index f08d72a5..269f225c 100644 --- a/test_smoke.py +++ b/test_smoke.py @@ -17,7 +17,7 @@ from pkg_resources import VersionConflict, DistributionNotFound, \ require -from ocw.tests.test_local import create_netcdf_object +from ocw.tests.test_local import create_netcdf_file from ocw.data_source import local from ocw import dataset_processor as dsp import os @@ -73,7 +73,7 @@ def check_dataset_loading(): ''' Try loading test dataset ''' dataset = None try: - file_path = create_netcdf_object() + file_path = create_netcdf_file() dataset = local.load_file(file_path, variable_name='value') except Exception as e: fail("\nDataset loading")