From 11780aa47c1717bf49d3384422850108029c6549 Mon Sep 17 00:00:00 2001 From: dagibbs22 Date: Tue, 22 Jun 2021 10:35:32 -0400 Subject: [PATCH] Changed entire model (main model and pre-processing) to compress using DEFLATE instead of LZW. DEFLATE-compressed files are substantially smaller than LZW-compressed files, and don't seem to take longer to create or need more memory. In fact, tests of full sets of tiles using DEFLATE instead of COMPRESS had the same peak memory for both and DEFLATE ran very slightly slower, but not so much so that it's worth using larger files (which obviously upload and download from s3 much faster). Testing is here: https://3.basecamp.com/3656819/buckets/7989024/todos/3884849933 (#20) I ran the model locally in Docker on 00N_110E and everything ran fine. Not running on s3 instance because this shouldn't affect anything on s3. One thing to note, though, is that since the files are smaller now, I probably don't have to delete as many intermediate tiles during the model run to have enough space on the ec2 instance. I don't know exactly how many more intermediate tiles I can keep on the instance, but I hope I can at least keep the intermediate tiles that I'll need to use later in the model (like mangrove biomass). Also, I haven't tested this for the pre-processeing steps, like mp_prep_other_inputs.py. I assume this new compression will work fine for those; the pre-processing steps are more likely not to work because of other changes I've made, actually. --- analyses/aggregate_results_to_4_km.py | 6 ++--- analyses/create_supplementary_outputs.py | 2 +- analyses/mp_aggregate_results_to_4_km.py | 4 ++-- analyses/net_flux.py | 2 +- analyses/tile_statistics.py | 2 +- burn_date/clip_year_tiles.py | 4 ++-- burn_date/hansen_burnyear_final.py | 2 +- carbon_pools/create_carbon_pools.py | 16 +++++++------- carbon_pools/create_inputs_for_C_pools.py | 2 +- carbon_pools/create_soil_C.py | 2 +- carbon_pools/mp_create_soil_C.py | 2 +- data_prep/model_extent.py | 2 +- data_prep/plantation_preparation.py | 18 +++++++-------- data_prep/prep_other_inputs.py | 6 ++--- ...c_gross_emissions_convert_to_grassland.cpp | 2 +- .../cpp_util/calc_gross_emissions_generic.cpp | 2 +- .../calc_gross_emissions_no_shifting_ag.cpp | 2 +- .../calc_gross_emissions_soil_only.cpp | 2 +- emissions/mp_peatland_processing.py | 2 +- emissions/peatland_processing.py | 6 ++--- gain/US_removal_rates.py | 2 +- ...nual_gain_rate_AGC_BGC_all_forest_types.py | 2 +- gain/annual_gain_rate_IPCC_defaults.py | 2 +- gain/annual_gain_rate_mangrove.py | 2 +- gain/continent_ecozone_tiles.py | 2 +- gain/forest_age_category_IPCC.py | 2 +- gain/gain_year_count_all_forest_types.py | 22 +++++++++---------- gain/gross_removals_all_forest_types.py | 2 +- run_full_model.py | 22 +++++++------------ sensitivity_analysis/Mekong_loss.py | 2 +- sensitivity_analysis/US_removal_rates.py | 2 +- sensitivity_analysis/legal_AMZ_loss.py | 10 ++++----- sensitivity_analysis/mp_Mekong_loss.py | 2 +- sensitivity_analysis/mp_legal_AMZ_loss.py | 4 ++-- universal_util.py | 14 ++++++------ 35 files changed, 86 insertions(+), 92 deletions(-) diff --git a/analyses/aggregate_results_to_4_km.py b/analyses/aggregate_results_to_4_km.py index 52b4568c..71a3892e 100644 --- a/analyses/aggregate_results_to_4_km.py +++ b/analyses/aggregate_results_to_4_km.py @@ -136,7 +136,7 @@ def aggregate(tile, thresh, sensit_type, no_upload): # from the 2D array created by rasterio above # https://gis.stackexchange.com/questions/279953/numpy-array-to-gtiff-using-rasterio-without-source-raster with rasterio.open(out_raster, 'w', - driver='GTiff', compress='LZW', nodata='0', dtype='float32', count=1, + driver='GTiff', compress='DEFLATE', nodata='0', dtype='float32', count=1, height=250, width=250, crs='EPSG:4326', transform=from_origin(xmin,ymax,0.04,0.04)) as aggregated: aggregated.write(sum_array, 1) @@ -208,9 +208,9 @@ def percent_diff(std_aggreg_flux, sensit_aggreg_flux, sensit_type, no_upload): perc_diff_outfilename = '{0}_{1}_{2}.tif'.format(cn.pattern_aggreg_sensit_perc_diff, sensit_type, date_formatted) perc_diff_outfilearg = '--outfile={}'.format(perc_diff_outfilename) # cmd = ['gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux, perc_diff_calc, perc_diff_outfilearg, - # '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--quiet'] + # '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--quiet'] cmd = ['gdal_calc.py', '-A', sensit_aggreg_flux, '-B', std_aggreg_flux, perc_diff_calc, perc_diff_outfilearg, - '--overwrite', '--co', 'COMPRESS=LZW', '--quiet'] + '--overwrite', '--co', 'COMPRESS=DEFLATE', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed diff --git a/analyses/create_supplementary_outputs.py b/analyses/create_supplementary_outputs.py index 5e21c769..80983fca 100644 --- a/analyses/create_supplementary_outputs.py +++ b/analyses/create_supplementary_outputs.py @@ -68,7 +68,7 @@ def create_supplementary_outputs(tile_id, input_pattern, output_patterns, sensit kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0, dtype='float32' ) diff --git a/analyses/mp_aggregate_results_to_4_km.py b/analyses/mp_aggregate_results_to_4_km.py index fe9b65e6..1a41607b 100644 --- a/analyses/mp_aggregate_results_to_4_km.py +++ b/analyses/mp_aggregate_results_to_4_km.py @@ -176,8 +176,8 @@ def mp_aggregate_results_to_4_km(sensit_type, thresh, tile_id_list, std_net_flux out_pattern = uu.name_aggregated_output(download_pattern_name, thresh, sensit_type) uu.print_log(out_pattern) - # Produces a single raster of all the 10x10 tiles (0.4 degree resolution) - cmd = ['gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0', '-co', 'COMPRESS=LZW', + # Produces a single raster of all the 10x10 tiles (0.04 degree resolution) + cmd = ['gdalwarp', '-t_srs', "EPSG:4326", '-overwrite', '-dstnodata', '0', '-co', 'COMPRESS=DEFLATE', '-tr', '0.04', '0.04', out_vrt, '{}.tif'.format(out_pattern)] uu.log_subprocess_output_full(cmd) diff --git a/analyses/net_flux.py b/analyses/net_flux.py index 7e16c2be..e3c3e393 100644 --- a/analyses/net_flux.py +++ b/analyses/net_flux.py @@ -51,7 +51,7 @@ def net_calc(tile_id, pattern, sensit_type, no_upload): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0, dtype='float32' ) diff --git a/analyses/tile_statistics.py b/analyses/tile_statistics.py index 95b6e8d5..54bcd12d 100644 --- a/analyses/tile_statistics.py +++ b/analyses/tile_statistics.py @@ -50,7 +50,7 @@ def create_tile_statistics(tile, sensit_type, tile_stats_txt): out = '--outfile={}'.format(outname) uu.print_log("Converting {} from /ha to /pixel...".format(tile)) - cmd = ['gdal_calc.py', '-A', tile, '-B', area_tile, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=LZW', + cmd = ['gdal_calc.py', '-A', tile, '-B', area_tile, calc, out, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--overwrite', '--quiet'] uu.log_subprocess_output_full(cmd) diff --git a/burn_date/clip_year_tiles.py b/burn_date/clip_year_tiles.py index 17ecae64..651561af 100644 --- a/burn_date/clip_year_tiles.py +++ b/burn_date/clip_year_tiles.py @@ -29,7 +29,7 @@ def clip_year_tiles(tile_year_list, no_upload): uu.print_log("Clipping burn year vrt to {0} for {1}".format(tile_id, year)) clipped_raster = "ba_clipped_{0}_{1}.tif".format(year, tile_id) - cmd = ['gdal_translate', '-ot', 'Byte', '-co', 'COMPRESS=LZW', '-a_nodata', '0'] + cmd = ['gdal_translate', '-ot', 'Byte', '-co', 'COMPRESS=DEFLATE', '-a_nodata', '0'] cmd += [vrt_name, clipped_raster, '-tr', '.00025', '.00025'] cmd += ['-projwin', str(xmin), str(ymax), str(xmax), str(ymin)] uu.log_subprocess_output_full(cmd) @@ -39,7 +39,7 @@ def clip_year_tiles(tile_year_list, no_upload): recoded_output = "ba_{0}_{1}.tif".format(year, tile_id) outfile = '--outfile={}'.format(recoded_output) - cmd = ['gdal_calc.py', '-A', clipped_raster, calc, outfile, '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--quiet'] + cmd = ['gdal_calc.py', '-A', clipped_raster, calc, outfile, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--quiet'] uu.log_subprocess_output_full(cmd) # Only copies to s3 if the tile has data. diff --git a/burn_date/hansen_burnyear_final.py b/burn_date/hansen_burnyear_final.py index 1c796537..77383987 100644 --- a/burn_date/hansen_burnyear_final.py +++ b/burn_date/hansen_burnyear_final.py @@ -134,7 +134,7 @@ def hansen_burnyear(tile_id, no_upload): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/carbon_pools/create_carbon_pools.py b/carbon_pools/create_carbon_pools.py index a2448e9f..2ffd6f54 100644 --- a/carbon_pools/create_carbon_pools.py +++ b/carbon_pools/create_carbon_pools.py @@ -124,7 +124,7 @@ def create_AGC(tile_id, sensit_type, carbon_pool_extent, no_upload): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0, dtype='float32' ) @@ -271,7 +271,7 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ AGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_2000) AGC_2000_src = rasterio.open(AGC_2000) kwargs = AGC_2000_src.meta - kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0) + kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_2000_src.block_windows(1) output_pattern_list = [cn.pattern_BGC_2000] if sensit_type != 'std': @@ -292,7 +292,7 @@ def create_BGC(tile_id, mang_BGB_AGB_ratio, carbon_pool_extent, sensit_type, no_ AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) AGC_emis_year_src = rasterio.open(AGC_emis_year) kwargs = AGC_emis_year_src.meta - kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0) + kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_emis_year_src.block_windows(1) output_pattern_list = [cn.pattern_BGC_emis_year] if sensit_type != 'std': @@ -398,7 +398,7 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat AGC_2000 = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_2000) AGC_2000_src = rasterio.open(AGC_2000) kwargs = AGC_2000_src.meta - kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0) + kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_2000_src.block_windows(1) output_pattern_list = [cn.pattern_deadwood_2000, cn.pattern_litter_2000] if sensit_type != 'std': @@ -429,7 +429,7 @@ def create_deadwood_litter(tile_id, mang_deadwood_AGB_ratio, mang_litter_AGB_rat AGC_emis_year = uu.sensit_tile_rename(sensit_type, tile_id, cn.pattern_AGC_emis_year) AGC_emis_year_src = rasterio.open(AGC_emis_year) kwargs = AGC_emis_year_src.meta - kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0) + kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_emis_year_src.block_windows(1) output_pattern_list = [cn.pattern_deadwood_emis_year_2000, cn.pattern_litter_emis_year_2000] @@ -713,7 +713,7 @@ def create_soil_emis_extent(tile_id, pattern, sensit_type, no_upload): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0, dtype='uint16' ) @@ -780,7 +780,7 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): uu.print_log(" No soil C 2000 tile found for", tile_id) kwargs = AGC_2000_src.meta - kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0) + kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_2000_src.block_windows(1) output_pattern_list = [cn.pattern_total_C_2000] if sensit_type != 'std': @@ -814,7 +814,7 @@ def create_total_C(tile_id, carbon_pool_extent, sensit_type, no_upload): uu.print_log(" No soil C emission year tile found for", tile_id) kwargs = AGC_emis_year_src.meta - kwargs.update(driver='GTiff', count=1, compress='lzw', nodata=0) + kwargs.update(driver='GTiff', count=1, compress='DEFLATE', nodata=0) windows = AGC_emis_year_src.block_windows(1) output_pattern_list = [cn.pattern_total_C_emis_year] if sensit_type != 'std': diff --git a/carbon_pools/create_inputs_for_C_pools.py b/carbon_pools/create_inputs_for_C_pools.py index 009edea4..d7192399 100644 --- a/carbon_pools/create_inputs_for_C_pools.py +++ b/carbon_pools/create_inputs_for_C_pools.py @@ -52,7 +52,7 @@ def create_input_files(tile_id, no_upload): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/carbon_pools/create_soil_C.py b/carbon_pools/create_soil_C.py index cb574f4c..e01d8ad0 100644 --- a/carbon_pools/create_soil_C.py +++ b/carbon_pools/create_soil_C.py @@ -91,7 +91,7 @@ def create_combined_soil_C(tile_id, no_upload): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/carbon_pools/mp_create_soil_C.py b/carbon_pools/mp_create_soil_C.py index 8489a7b6..30773b52 100644 --- a/carbon_pools/mp_create_soil_C.py +++ b/carbon_pools/mp_create_soil_C.py @@ -220,7 +220,7 @@ def mp_create_soil_C(tile_id_list, no_upload=None): calc = '--calc=(A-B)/3' out_filearg = '--outfile={}'.format(soil_C_stdev_global) cmd = ['gdal_calc.py', '-A', vrt_CI95, '-B', vrt_CI05, calc, out_filearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type=Float32'] + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type=Float32'] uu.log_subprocess_output_full(cmd) uu.print_log("{} created.".format(soil_C_stdev_global)) diff --git a/data_prep/model_extent.py b/data_prep/model_extent.py index bc2d615d..a50c4c55 100644 --- a/data_prep/model_extent.py +++ b/data_prep/model_extent.py @@ -55,7 +55,7 @@ def model_extent(tile_id, pattern, sensit_type, no_upload): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/data_prep/plantation_preparation.py b/data_prep/plantation_preparation.py index 351d942d..f5548f1e 100644 --- a/data_prep/plantation_preparation.py +++ b/data_prep/plantation_preparation.py @@ -35,7 +35,7 @@ def rasterize_gadm_1x1(tile_id): tile_1x1 = 'GADM_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1) uu.print_log("Rasterizing", tile_1x1) cmd = ['gdal_rasterize', '-tr', '{}'.format(str(cn.Hansen_res)), '{}'.format(str(cn.Hansen_res)), - '-co', 'COMPRESS=LZW', '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), + '-co', 'COMPRESS=DEFLATE', '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-burn', '1', '-a_nodata', '0', cn.gadm_iso, tile_1x1] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) @@ -101,7 +101,7 @@ def create_1x1_plantation_from_1x1_gadm(tile_1x1): # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector # For plantation gain rate - cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_gain_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'growth', '-a_nodata', '0'] + cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=DEFLATE', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_gain_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'growth', '-a_nodata', '0'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: @@ -109,7 +109,7 @@ def create_1x1_plantation_from_1x1_gadm(tile_1x1): # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector # For plantation type - cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0'] + cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=DEFLATE', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: @@ -139,7 +139,7 @@ def create_1x1_plantation_growth_from_1x1_planted(tile_1x1): uu.print_log("There are plantations in {}. Converting to raster...".format(tile_1x1)) # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector - cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', + cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=DEFLATE', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_gain_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'growth', '-a_nodata', '0'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging @@ -167,7 +167,7 @@ def create_1x1_plantation_type_from_1x1_planted(tile_1x1): uu.print_log("There are plantations in {}. Converting to raster...".format(tile_1x1)) # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector - cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', + cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=DEFLATE', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_type_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'type_reclass', '-a_nodata', '0', '-ot', 'Byte'] @@ -193,7 +193,7 @@ def create_1x1_plantation_stdev_from_1x1_planted(tile_1x1): print("There are plantations in {}. Converting to stdev raster...".format(tile_1x1)) # https://gis.stackexchange.com/questions/187224/how-to-use-gdal-rasterize-with-postgis-vector - cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=LZW', 'PG:dbname=ubuntu', + cmd = ['gdal_rasterize', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-co', 'COMPRESS=DEFLATE', 'PG:dbname=ubuntu', '-l', 'all_plant', 'plant_stdev_{0}_{1}.tif'.format(ymax_1x1, xmin_1x1), '-te', str(xmin_1x1), str(ymin_1x1), str(xmax_1x1), str(ymax_1x1), '-a', 'SD_error', '-a_nodata', '0'] @@ -211,7 +211,7 @@ def create_10x10_plantation_gain(tile_id, plant_gain_1x1_vrt): tile_10x10 = '{0}_{1}.tif'.format(tile_id, cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked) uu.print_log("Rasterizing", tile_10x10) cmd = ['gdalwarp', '-tr', '{}'.format(str(cn.Hansen_res)), '{}'.format(str(cn.Hansen_res)), - '-co', 'COMPRESS=LZW', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), + '-co', 'COMPRESS=DEFLATE', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-t_srs', 'EPSG:4326', '-overwrite', '-ot', 'Float32', plant_gain_1x1_vrt, tile_10x10] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) @@ -242,7 +242,7 @@ def create_10x10_plantation_type(tile_id, plant_type_1x1_vrt): tile_10x10 = '{0}_{1}.tif'.format(tile_id, cn.pattern_planted_forest_type_unmasked) uu.print_log("Rasterizing", tile_10x10) cmd = ['gdalwarp', '-tr', '{}'.format(str(cn.Hansen_res)), '{}'.format(str(cn.Hansen_res)), - '-co', 'COMPRESS=LZW', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), + '-co', 'COMPRESS=DEFLATE', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-t_srs', 'EPSG:4326', '-overwrite', '-ot', 'Byte', plant_type_1x1_vrt, tile_10x10] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) @@ -273,7 +273,7 @@ def create_10x10_plantation_gain_stdev(tile_id, plant_stdev_1x1_vrt): tile_10x10 = '{0}_{1}.tif'.format(tile_id, cn.pattern_planted_forest_stdev_unmasked) print("Rasterizing", tile_10x10) cmd = ['gdalwarp', '-tr', '{}'.format(str(cn.Hansen_res)), '{}'.format(str(cn.Hansen_res)), - '-co', 'COMPRESS=LZW', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), + '-co', 'COMPRESS=DEFLATE', '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-t_srs', 'EPSG:4326', '-overwrite', '-ot', 'Float32', plant_stdev_1x1_vrt, tile_10x10] subprocess.check_call(cmd) diff --git a/data_prep/prep_other_inputs.py b/data_prep/prep_other_inputs.py index 3f054fd2..1df0bef0 100644 --- a/data_prep/prep_other_inputs.py +++ b/data_prep/prep_other_inputs.py @@ -26,7 +26,7 @@ def rasterize_pre_2000_plantations(tile_id): out_tile = '{0}_{1}.tif'.format(tile_id, cn.pattern_plant_pre_2000) - cmd= ['gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=LZW', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), + cmd= ['gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=DEFLATE', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-ot', 'Byte', '-a_nodata', '0', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '{}.shp'.format(cn.pattern_plant_pre_2000_raw), out_tile] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging @@ -51,7 +51,7 @@ def create_climate_zone_tiles(tile_id): # Rather than the usual 40000x1 windows, this creates 1024x1024 windows for filling in missing values (see below). # The output of gdalwarp ("climate_zone_intermediate") is not used anywhere else. uu.print_log("Warping climate zone tile", tile_id) - cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=LZW', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), + cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=DEFLATE', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-ot', 'Byte', '-overwrite', '-co', 'TILED=YES', '-co', 'BLOCKXSIZE=1024', '-co', 'BLOCKYSIZE=1024', @@ -82,7 +82,7 @@ def create_climate_zone_tiles(tile_id): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/emissions/cpp_util/calc_gross_emissions_convert_to_grassland.cpp b/emissions/cpp_util/calc_gross_emissions_convert_to_grassland.cpp index 136367b7..13f85f07 100644 --- a/emissions/cpp_util/calc_gross_emissions_convert_to_grassland.cpp +++ b/emissions/cpp_util/calc_gross_emissions_convert_to_grassland.cpp @@ -223,7 +223,7 @@ GDALRasterBand *OUTBAND20; OGRSpatialReference oSRS; char *OUTPRJ = NULL; char **papszOptions = NULL; -papszOptions = CSLSetNameValue( papszOptions, "COMPRESS", "LZW" ); +papszOptions = CSLSetNameValue( papszOptions, "COMPRESS", "DEFLATE" ); OUTDRIVER = GetGDALDriverManager()->GetDriverByName("GTIFF"); if( OUTDRIVER == NULL ) {cout << "no driver" << endl; exit( 1 );}; oSRS.SetWellKnownGeogCS( "WGS84" ); diff --git a/emissions/cpp_util/calc_gross_emissions_generic.cpp b/emissions/cpp_util/calc_gross_emissions_generic.cpp index 212e4761..eae2a292 100644 --- a/emissions/cpp_util/calc_gross_emissions_generic.cpp +++ b/emissions/cpp_util/calc_gross_emissions_generic.cpp @@ -277,7 +277,7 @@ GDALRasterBand *OUTBAND20; OGRSpatialReference oSRS; char *OUTPRJ = NULL; char **papszOptions = NULL; -papszOptions = CSLSetNameValue( papszOptions, "COMPRESS", "LZW" ); +papszOptions = CSLSetNameValue( papszOptions, "COMPRESS", "DEFLATE" ); OUTDRIVER = GetGDALDriverManager()->GetDriverByName("GTIFF"); if( OUTDRIVER == NULL ) {cout << "no driver" << endl; exit( 1 );}; oSRS.SetWellKnownGeogCS( "WGS84" ); diff --git a/emissions/cpp_util/calc_gross_emissions_no_shifting_ag.cpp b/emissions/cpp_util/calc_gross_emissions_no_shifting_ag.cpp index 545919bf..ab718fb7 100644 --- a/emissions/cpp_util/calc_gross_emissions_no_shifting_ag.cpp +++ b/emissions/cpp_util/calc_gross_emissions_no_shifting_ag.cpp @@ -223,7 +223,7 @@ GDALRasterBand *OUTBAND20; OGRSpatialReference oSRS; char *OUTPRJ = NULL; char **papszOptions = NULL; -papszOptions = CSLSetNameValue( papszOptions, "COMPRESS", "LZW" ); +papszOptions = CSLSetNameValue( papszOptions, "COMPRESS", "DEFLATE" ); OUTDRIVER = GetGDALDriverManager()->GetDriverByName("GTIFF"); if( OUTDRIVER == NULL ) {cout << "no driver" << endl; exit( 1 );}; oSRS.SetWellKnownGeogCS( "WGS84" ); diff --git a/emissions/cpp_util/calc_gross_emissions_soil_only.cpp b/emissions/cpp_util/calc_gross_emissions_soil_only.cpp index 695c3976..52476c72 100644 --- a/emissions/cpp_util/calc_gross_emissions_soil_only.cpp +++ b/emissions/cpp_util/calc_gross_emissions_soil_only.cpp @@ -254,7 +254,7 @@ GDALRasterBand *OUTBAND20; OGRSpatialReference oSRS; char *OUTPRJ = NULL; char **papszOptions = NULL; -papszOptions = CSLSetNameValue( papszOptions, "COMPRESS", "LZW" ); +papszOptions = CSLSetNameValue( papszOptions, "COMPRESS", "DEFLATE" ); OUTDRIVER = GetGDALDriverManager()->GetDriverByName("GTIFF"); if( OUTDRIVER == NULL ) {cout << "no driver" << endl; exit( 1 );}; oSRS.SetWellKnownGeogCS( "WGS84" ); diff --git a/emissions/mp_peatland_processing.py b/emissions/mp_peatland_processing.py index 7552a5ef..84bcda9d 100644 --- a/emissions/mp_peatland_processing.py +++ b/emissions/mp_peatland_processing.py @@ -66,7 +66,7 @@ def mp_peatland_processing(tile_id_list, run_date = None): # Converts the Jukka peat shapefile to a raster uu.print_log('Rasterizing jukka peat...') - cmd= ['gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=LZW', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), + cmd= ['gdal_rasterize', '-burn', '1', '-co', 'COMPRESS=DEFLATE', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-ot', 'Byte', '-a_nodata', '0', cn.jukka_peat_shp, jukka_tif] uu.log_subprocess_output_full(cmd) uu.print_log(' Jukka peat rasterized') diff --git a/emissions/peatland_processing.py b/emissions/peatland_processing.py index c8b7e355..9e9a6499 100644 --- a/emissions/peatland_processing.py +++ b/emissions/peatland_processing.py @@ -43,7 +43,7 @@ def create_peat_mask_tiles(tile_id): calc = '--calc=(A==14)' peat_mask_out_filearg = '--outfile={}'.format(out_tile_no_tag) cmd = ['gdal_calc.py', '-A', out_intermediate, calc, peat_mask_out_filearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type=Byte', '--quiet'] + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type=Byte', '--quiet'] uu.log_subprocess_output_full(cmd) uu.print_log("{} created.".format(tile_id)) @@ -57,7 +57,7 @@ def create_peat_mask_tiles(tile_id): uu.print_log("{} is inside CIFOR band. Using CIFOR/Jukka combination...".format(tile_id)) # Combines CIFOR and Jukka (if it occurs there) - cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=LZW', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), + cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=DEFLATE', '-tr', '{}'.format(cn.Hansen_res), '{}'.format(cn.Hansen_res), '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-overwrite', '{}'.format(cn.cifor_peat_file), 'jukka_peat.tif', out_tile_no_tag] uu.log_subprocess_output_full(cmd) @@ -91,7 +91,7 @@ def create_peat_mask_tiles(tile_id): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/gain/US_removal_rates.py b/gain/US_removal_rates.py index 85c5dd67..656cab93 100644 --- a/gain/US_removal_rates.py +++ b/gain/US_removal_rates.py @@ -41,7 +41,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g kwargs.update( driver='GTiff', count=1, - compress='LZW', + compress='DEFLATE', nodata=0, dtype='float32' ) diff --git a/gain/annual_gain_rate_AGC_BGC_all_forest_types.py b/gain/annual_gain_rate_AGC_BGC_all_forest_types.py index a285bded..11ceb04e 100644 --- a/gain/annual_gain_rate_AGC_BGC_all_forest_types.py +++ b/gain/annual_gain_rate_AGC_BGC_all_forest_types.py @@ -55,7 +55,7 @@ def annual_gain_rate_AGC_BGC_all_forest_types(tile_id, output_pattern_list, sens kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/gain/annual_gain_rate_IPCC_defaults.py b/gain/annual_gain_rate_IPCC_defaults.py index 21499c39..eceaced6 100644 --- a/gain/annual_gain_rate_IPCC_defaults.py +++ b/gain/annual_gain_rate_IPCC_defaults.py @@ -59,7 +59,7 @@ def annual_gain_rate(tile_id, sensit_type, gain_table_dict, stdev_table_dict, ou kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0, dtype='float32' ) diff --git a/gain/annual_gain_rate_mangrove.py b/gain/annual_gain_rate_mangrove.py index 76961367..ada1ab0a 100644 --- a/gain/annual_gain_rate_mangrove.py +++ b/gain/annual_gain_rate_mangrove.py @@ -53,7 +53,7 @@ def annual_gain_rate(tile_id, sensit_type, output_pattern_list, gain_above_dict, kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0, dtype='float32' ) diff --git a/gain/continent_ecozone_tiles.py b/gain/continent_ecozone_tiles.py index c95a56b7..c896ccd0 100644 --- a/gain/continent_ecozone_tiles.py +++ b/gain/continent_ecozone_tiles.py @@ -62,7 +62,7 @@ def create_continent_ecozone_tiles(tile_id): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/gain/forest_age_category_IPCC.py b/gain/forest_age_category_IPCC.py index b95f967e..f632cfc4 100644 --- a/gain/forest_age_category_IPCC.py +++ b/gain/forest_age_category_IPCC.py @@ -97,7 +97,7 @@ def forest_age_category(tile_id, gain_table_dict, pattern, sensit_type, no_uploa kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/gain/gain_year_count_all_forest_types.py b/gain/gain_year_count_all_forest_types.py index 5a16c7e5..1f11f64e 100644 --- a/gain/gain_year_count_all_forest_types.py +++ b/gain/gain_year_count_all_forest_types.py @@ -39,7 +39,7 @@ def create_gain_year_count_loss_only(tile_id, sensit_type, no_upload): loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) loss_outfilearg = '--outfile={}'.format(loss_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_calc, loss_outfilearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: uu.print_log("No loss tile found for {}. Skipping loss only pixel gain year count.".format(tile_id)) @@ -65,7 +65,7 @@ def create_gain_year_count_gain_only_standard(tile_id, sensit_type, no_upload): gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) gain_outfilearg = '--outfile={}'.format(gain_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, gain_calc, gain_outfilearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: uu.print_log("No loss tile found for {}. Not using it for gain only pixel gain year count.".format(tile_id)) @@ -73,7 +73,7 @@ def create_gain_year_count_gain_only_standard(tile_id, sensit_type, no_upload): gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) gain_outfilearg = '--outfile={}'.format(gain_outfilename) cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, gain_calc, gain_outfilearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed @@ -97,7 +97,7 @@ def create_gain_year_count_gain_only_maxgain(tile_id, sensit_type, no_upload): gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) gain_outfilearg = '--outfile={}'.format(gain_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, gain_calc, gain_outfilearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: uu.print_log("No loss tile found for {}. Not using loss for gain only pixel gain year count.".format(tile_id)) @@ -105,7 +105,7 @@ def create_gain_year_count_gain_only_maxgain(tile_id, sensit_type, no_upload): gain_outfilename = '{}_growth_years_gain_only.tif'.format(tile_id) gain_outfilearg = '--outfile={}'.format(gain_outfilename) cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, gain_calc, gain_outfilearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed @@ -130,7 +130,7 @@ def create_gain_year_count_no_change_standard(tile_id, sensit_type, no_upload): no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, no_change_calc, - no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: uu.print_log("No loss tile found for {}. Not using it for no change pixel gain year count.".format(tile_id)) @@ -138,7 +138,7 @@ def create_gain_year_count_no_change_standard(tile_id, sensit_type, no_upload): no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = ['gdal_calc.py', '-A', gain, '-B', model_extent, no_change_calc, - no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) # Prints information about the tile that was just processed @@ -167,7 +167,7 @@ def create_gain_year_count_no_change_legal_Amazon_loss(tile_id, sensit_type, no_ no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = ['gdal_calc.py', '-A', loss_vrt, '-B', model_extent, no_change_calc, - no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) os.remove(loss_vrt) @@ -193,7 +193,7 @@ def create_gain_year_count_loss_and_gain_standard(tile_id, sensit_type, no_uploa loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_and_gain_calc, - loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: uu.print_log("No loss tile found for {}. Skipping loss and gain pixel gain year count.".format(tile_id)) @@ -220,7 +220,7 @@ def create_gain_year_count_loss_and_gain_maxgain(tile_id, sensit_type, no_upload loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', model_extent, loss_and_gain_calc, - loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] uu.log_subprocess_output_full(cmd) else: uu.print_log("No loss tile found for {}. Skipping loss and gain pixel gain year count.".format(tile_id)) @@ -259,7 +259,7 @@ def create_gain_year_count_merge(tile_id, pattern, sensit_type, no_upload): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/gain/gross_removals_all_forest_types.py b/gain/gross_removals_all_forest_types.py index 8c76777c..4a3fecb9 100644 --- a/gain/gross_removals_all_forest_types.py +++ b/gain/gross_removals_all_forest_types.py @@ -55,7 +55,7 @@ def gross_removals_all_forest_types(tile_id, output_pattern_list, sensit_type, n kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/run_full_model.py b/run_full_model.py index 9998c475..90e32a35 100644 --- a/run_full_model.py +++ b/run_full_model.py @@ -352,7 +352,7 @@ def main (): uu.print_log(":::::Freeing up memory for gain year count creation by deleting unneeded tiles") tiles_to_delete = [] - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) + # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) @@ -363,9 +363,9 @@ def main (): tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) + # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_all_types))) + # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) + # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_mangrove))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) @@ -415,12 +415,6 @@ def main (): uu.print_log(":::::Freeing up memory for carbon pool creation by deleting unneeded tiles") tiles_to_delete = [] tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_model_extent))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_mangrove))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_mangrove))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_Europe))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_planted_forest_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_BGC_natrl_forest_US))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGC_natrl_forest_young))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_age_cat_IPCC))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_AGB_IPCC_defaults))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_annual_gain_BGB_IPCC_defaults))) @@ -429,9 +423,9 @@ def main (): tiles_to_delete.extend(glob.glob('*growth_years*tif')) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_gain_year_count))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_BGCO2_all_types))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_BGCO2_all_types))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) + # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_BGCO2_all_types))) + # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_ifl_primary))) + # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_planted_forest_type_unmasked))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_plant_pre_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGB_mangrove))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_stdev_annual_gain_AGC_BGC_natrl_forest_Europe))) @@ -479,7 +473,7 @@ def main (): tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cumul_gain_AGCO2_all_types))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_cont_eco_processed))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_WHRC_biomass_2000_unmasked))) - tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) + # tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_mangrove_biomass_2000))) tiles_to_delete.extend(glob.glob('*{}*tif'.format(cn.pattern_removal_forest_type))) uu.print_log(" Deleting", len(tiles_to_delete), "tiles...") diff --git a/sensitivity_analysis/Mekong_loss.py b/sensitivity_analysis/Mekong_loss.py index 38f228ad..344b8544 100644 --- a/sensitivity_analysis/Mekong_loss.py +++ b/sensitivity_analysis/Mekong_loss.py @@ -27,7 +27,7 @@ def recode_tiles(annual_loss): recoded_output = "Mekong_loss_recoded_{}.tif".format(year) outfile = '--outfile={}'.format(recoded_output) - cmd = ['gdal_calc.py', '-A', annual_loss, calc, outfile, '--NoDataValue=0', '--co', 'COMPRESS=LZW', '--quiet'] + cmd = ['gdal_calc.py', '-A', annual_loss, calc, outfile, '--NoDataValue=0', '--co', 'COMPRESS=DEFLATE', '--quiet'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: diff --git a/sensitivity_analysis/US_removal_rates.py b/sensitivity_analysis/US_removal_rates.py index 036c0a0d..cd90fbd4 100644 --- a/sensitivity_analysis/US_removal_rates.py +++ b/sensitivity_analysis/US_removal_rates.py @@ -78,7 +78,7 @@ def US_removal_rate_calc(tile_id, gain_table_group_region_age_dict, gain_table_g kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) diff --git a/sensitivity_analysis/legal_AMZ_loss.py b/sensitivity_analysis/legal_AMZ_loss.py index 32aa4433..2a3d6d87 100644 --- a/sensitivity_analysis/legal_AMZ_loss.py +++ b/sensitivity_analysis/legal_AMZ_loss.py @@ -51,7 +51,7 @@ def legal_Amazon_forest_age_category(tile_id, sensit_type, output_pattern): kwargs.update( driver='GTiff', count=1, - compress='lzw', + compress='DEFLATE', nodata=0 ) @@ -121,7 +121,7 @@ def legal_Amazon_create_gain_year_count_loss_only(tile_id, sensit_type): loss_outfilename = '{}_growth_years_loss_only.tif'.format(tile_id) loss_outfilearg = '--outfile={}'.format(loss_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', extent, loss_calc, loss_outfilearg, - '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: @@ -156,7 +156,7 @@ def legal_Amazon_create_gain_year_count_no_change(tile_id, sensit_type): no_change_outfilename = '{}_growth_years_no_change.tif'.format(tile_id) no_change_outfilearg = '--outfile={}'.format(no_change_outfilename) cmd = ['gdal_calc.py', '-A', loss_vrt, '-B', extent, '-C', biomass, no_change_calc, - no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + no_change_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: @@ -182,7 +182,7 @@ def legal_Amazon_create_gain_year_count_loss_and_gain_standard(tile_id, sensit_t loss_and_gain_outfilename = '{}_growth_years_loss_and_gain.tif'.format(tile_id) loss_and_gain_outfilearg = '--outfile={}'.format(loss_and_gain_outfilename) cmd = ['gdal_calc.py', '-A', loss, '-B', gain, '-C', extent, loss_and_gain_calc, - loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--type', 'Byte', '--quiet'] + loss_and_gain_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--type', 'Byte', '--quiet'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: @@ -208,7 +208,7 @@ def legal_Amazon_create_gain_year_count_merge(tile_id, output_pattern): # All four components are merged together to the final output raster age_outfile = '{}_{}.tif'.format(tile_id, output_pattern) cmd = ['gdal_merge.py', '-o', age_outfile, loss_outfilename, no_change_outfilename, loss_and_gain_outfilename, - '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-ot', 'Byte'] + '-co', 'COMPRESS=DEFLATE', '-a_nodata', '0', '-ot', 'Byte'] # Solution for adding subprocess output to log is from https://stackoverflow.com/questions/21953835/run-subprocess-and-print-output-to-logging process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: diff --git a/sensitivity_analysis/mp_Mekong_loss.py b/sensitivity_analysis/mp_Mekong_loss.py index 04a3bf19..c282ac82 100644 --- a/sensitivity_analysis/mp_Mekong_loss.py +++ b/sensitivity_analysis/mp_Mekong_loss.py @@ -45,7 +45,7 @@ def main (): # the earlier loss gets) uu.print_log("Merging all loss years within model range...") loss_composite = "Mekong_loss_2001_2015.tif" - cmd = ['gdal_merge.py', '-o', loss_composite, '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-ot', 'Byte', + cmd = ['gdal_merge.py', '-o', loss_composite, '-co', 'COMPRESS=DEFLATE', '-a_nodata', '0', '-ot', 'Byte', "Mekong_loss_recoded_2015.tif", "Mekong_loss_recoded_2014.tif", "Mekong_loss_recoded_2013.tif", "Mekong_loss_recoded_2012.tif", "Mekong_loss_recoded_2011.tif", "Mekong_loss_recoded_2010.tif", "Mekong_loss_recoded_2009.tif", "Mekong_loss_recoded_2008.tif", "Mekong_loss_recoded_2007.tif", diff --git a/sensitivity_analysis/mp_legal_AMZ_loss.py b/sensitivity_analysis/mp_legal_AMZ_loss.py index 86037ee5..c9d34393 100644 --- a/sensitivity_analysis/mp_legal_AMZ_loss.py +++ b/sensitivity_analysis/mp_legal_AMZ_loss.py @@ -96,7 +96,7 @@ def main (): # This merges all six rasters together, so it takes a lot of memory and time. It seems to repeatedly max out # at about 300 GB as it progresses abot 15% each time; then the memory drops back to 0 and slowly increases. cmd = ['gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_forest_extent_2000_merged), - '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), + '-co', 'COMPRESS=DEFLATE', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), raw_forest_extent_inputs[0], raw_forest_extent_inputs[1], raw_forest_extent_inputs[2], raw_forest_extent_inputs[3], raw_forest_extent_inputs[4], raw_forest_extent_inputs[5]] uu.log_subprocess_output_full(cmd) @@ -149,7 +149,7 @@ def main (): # This took about 8 minutes. uu.print_log("Merging input loss rasters into a composite for all years...") cmd = ['gdal_merge.py', '-o', '{}.tif'.format(cn.pattern_Brazil_annual_loss_merged), - '-co', 'COMPRESS=LZW', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), + '-co', 'COMPRESS=DEFLATE', '-a_nodata', '0', '-n', '0', '-ot', 'Byte', '-ps', '{}'.format(pixelSizeX), '{}'.format(pixelSizeY), 'Prodes2019_annual_loss_2008_2019.tif', 'Prodes2014_annual_loss_2001_2007.tif'] uu.log_subprocess_output_full(cmd) uu.print_log(" Loss rasters combined into composite") diff --git a/universal_util.py b/universal_util.py index 3b26b30c..1faa6024 100644 --- a/universal_util.py +++ b/universal_util.py @@ -997,7 +997,7 @@ def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt, no_upload): out_tile = '{0}_{1}.tif'.format(tile_id, out_pattern) - cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=LZW', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-tap', '-te', + cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=DEFLATE', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-ot', dt, '-overwrite', source_raster, out_tile] process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: @@ -1008,7 +1008,7 @@ def mp_warp_to_Hansen(tile_id, source_raster, out_pattern, dt, no_upload): def warp_to_Hansen(in_file, out_file, xmin, ymin, xmax, ymax, dt): - cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=LZW', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-tap', '-te', + cmd = ['gdalwarp', '-t_srs', 'EPSG:4326', '-co', 'COMPRESS=DEFLATE', '-tr', str(cn.Hansen_res), str(cn.Hansen_res), '-tap', '-te', str(xmin), str(ymin), str(xmax), str(ymax), '-dstnodata', '0', '-ot', dt, '-overwrite', in_file, out_file] process = Popen(cmd, stdout=PIPE, stderr=STDOUT) with process.stdout: @@ -1017,7 +1017,7 @@ def warp_to_Hansen(in_file, out_file, xmin, ymin, xmax, ymax, dt): # Rasterizes the shapefile within the bounding coordinates of a tile def rasterize(in_shape, out_tif, xmin, ymin, xmax, ymax, blocksizex, blocksizey, tr=None, ot=None, name_field=None, anodata=None): - cmd = ['gdal_rasterize', '-co', 'COMPRESS=LZW', + cmd = ['gdal_rasterize', '-co', 'COMPRESS=DEFLATE', # Input raster is ingested as 1024x1024 pixel tiles (rather than the default of 1 pixel wide strips '-co', 'TILED=YES', '-co', 'BLOCKXSIZE={}'.format(blocksizex), '-co', 'BLOCKYSIZE={}'.format(blocksizey), @@ -1066,7 +1066,7 @@ def make_blank_tile(tile_id, pattern, folder, sensit_type): # If the tile is already on the spot machine, it uses the downloaded tile. if os.path.exists(os.path.join(folder, '{0}_{1}.tif'.format(cn.pattern_loss, tile_id))): print_log("Hansen loss tile exists for {}. Using that as template for blank tile.".format(tile_id)) - cmd = ['gdal_merge.py', '-createonly', '-init', '0', '-co', 'COMPRESS=LZW', '-ot', 'Byte', + cmd = ['gdal_merge.py', '-createonly', '-init', '0', '-co', 'COMPRESS=DEFLATE', '-ot', 'Byte', '-o', '{0}{1}_{2}.tif'.format(folder, tile_id, pattern), '{0}{1}_{2}.tif'.format(folder, cn.pattern_loss, tile_id)] check_call(cmd) @@ -1084,7 +1084,7 @@ def make_blank_tile(tile_id, pattern, folder, sensit_type): # Uses either the Hansen loss tile or pixel area tile as a template tile, # with the output name corresponding to the model type - cmd = ['gdal_merge.py', '-createonly', '-init', '0', '-co', 'COMPRESS=LZW', '-ot', 'Byte', + cmd = ['gdal_merge.py', '-createonly', '-init', '0', '-co', 'COMPRESS=DEFLATE', '-ot', 'Byte', '-o', '{0}/{1}_{2}.tif'.format(folder, tile_id, full_pattern), '{0}/{1}_{2}.tif'.format(folder, tile_id, 'empty_tile_template')] check_call(cmd) @@ -1161,7 +1161,7 @@ def mask_pre_2000_plantation(pre_2000_plant, tile_to_mask, out_name, tile_id): calc = '--calc=A*(B==0)' loss_outfilearg = '--outfile={}'.format(out_name) cmd = ['gdal_calc.py', '-A', tile_to_mask, '-B', pre_2000_vrt, - calc, loss_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=LZW', '--quiet'] + calc, loss_outfilearg, '--NoDataValue=0', '--overwrite', '--co', 'COMPRESS=DEFLATE', '--quiet'] check_call(cmd) # Basically, does nothing if there is no pre-2000 plantation and the output name is the same as the @@ -1381,7 +1381,7 @@ def rewindow(tile_id, download_pattern_name, no_upload): # Just using gdalwarp inflated the output rasters about 10x, even with COMPRESS=LZW. # Solution was to use gdal_translate instead, although, for unclear reasons, this still inflates the size - # of the pixel area tiles but not other tiles. + # of the pixel area tiles but not other tiles using LZW. DEFLATE makes all outputs smaller. cmd = ['gdal_translate', '-co', 'COMPRESS=DEFLATE', '-co', 'TILED=YES', '-co', 'BLOCKXSIZE={}'.format(cn.agg_pixel_window), '-co', 'BLOCKYSIZE={}'.format(cn.agg_pixel_window), in_tile, out_tile]