Skip to content

Commit

Permalink
made some maps
Browse files Browse the repository at this point in the history
  • Loading branch information
anna-boser committed May 29, 2024
1 parent 875caec commit 32c0da6
Show file tree
Hide file tree
Showing 13 changed files with 389 additions and 98 deletions.
50 changes: 50 additions & 0 deletions code/0_process_data/0_subset_CPIS.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import yaml
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import utils

# Load the configuration file
with open('config.yaml', 'r') as f:
Expand All @@ -12,10 +15,57 @@
CPIS2021 = gpd.read_file(config["CPIS_2021_shp_path"])
Africa = gpd.read_file(config["Africa_countries_shp_path"])

# Rename the country name column in the Africa shapefile from "NAME_0" to "Country"
Africa = Africa.rename(columns={'NAME_0': 'Country', 'ISO' : 'Country Code'}) # Unfortunately 'Country Code' will be renamed to 'Country Co' when saving due to a 10-character limit on shapefiles.

# Remove the 'OBJECTID', 'Continent', 'REgion' columns
Africa = Africa.drop(columns=['OBJECTID', 'Continent', 'REgion'])

# Save this shapefile
Africa.to_file(config["Africa_countries_shp_path"])

# Perform the spatial intersection
CPIS2000_Africa = gpd.overlay(CPIS2000, Africa, how='intersection')
CPIS2021_Africa = gpd.overlay(CPIS2021, Africa, how='intersection')

# Save the intersected shapefile
CPIS2000_Africa.to_file(config["Africa_CPIS_2000_shp_path"])
CPIS2021_Africa.to_file(config["Africa_CPIS_2021_shp_path"])

# # Read the shapefiles
# CPIS2000_Africa = gpd.read_file(config["Africa_CPIS_2000_shp_path"])
# CPIS2021_Africa = gpd.read_file(config["Africa_CPIS_2021_shp_path"])
# Africa = gpd.read_file(config["Africa_countries_shp_path"])

# Add a 'year' column to each dataframe
CPIS2000_Africa['Year'] = 2000
CPIS2021_Africa['Year'] = 2021

# Merge the two dataframes
CPIS_Africa = gpd.GeoDataFrame(pd.concat([CPIS2000_Africa, CPIS2021_Africa], ignore_index=True))

# Save the merged shapefile
CPIS_Africa.to_file(config["Africa_CPIS_shp_path"])

# Buffer the geometry to make it more visible
CPIS_Africa['geometry'] = CPIS_Africa.geometry.buffer(0.1)

# Create a single plot with two facets
fig, axes = plt.subplots(1, 2, figsize=(20, 10))

# Plot the CPIS data for 2000
Africa.boundary.plot(ax=axes[0], color='black')
CPIS_Africa[CPIS_Africa['Year'] == 2000].plot(ax=axes[0], color='green')
axes[0].set_title('CPIS in 2000 in Africa')

# Plot the CPIS data for 2021
Africa.boundary.plot(ax=axes[1], color='black')
CPIS_Africa[CPIS_Africa['Year'] == 2021].plot(ax=axes[1], color='green')
axes[1].set_title('CPIS in 2021 in Africa')

# Adjust layout and show the plot
plt.tight_layout()
plt.show()

# Save the combined figure
fig.savefig(utils.make_output_path('CPIS_2000_2021_Africa.png'))
35 changes: 35 additions & 0 deletions code/0_process_data/1_government_effectiveness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Add the government effectiveness index to the CPIS data

import geopandas as gpd
import pandas as pd
import utils
import yaml

# Load the configuration file
with open('config.yaml', 'r') as f:
config = yaml.safe_load(f)

# read in the government effectiveness index data
GEI = pd.read_csv(config["government_effectiveness_path"])

# Rename the GEI "Country Name" column to "NAME_0" to match the CPIS data
GEI = GEI.rename(columns={'Country Name': 'Country'})

# pivot the GEI data so the year becomes one column
GEI = pd.melt(GEI, id_vars=['Country', 'Country Code'], var_name='Year', value_name='GEI')

# Convert the year column to integer
GEI['Year'] = GEI['Year'].astype(int)

# Add a two year and four year lag GEI variable to the GEI data
GEI = utils.create_lagged_column(GEI, 'GEI', 2)
GEI = utils.create_lagged_column(GEI, 'GEI', 4)

# read in the 2000 and 2021 cpis data
CPIS = gpd.read_file(config["Africa_CPIS_shp_path"])

# Add GEI information to the CPIS data by matching on year and country
CPIS = CPIS.merge(GEI, on=['Year', 'Country', 'Country Code'])

# Save the CPIS data with GEI information
CPIS.to_file(config["Africa_CPIS_GEI_shp_path"])
63 changes: 0 additions & 63 deletions code/0_process_data/1_resample_CPIS.py

This file was deleted.

97 changes: 97 additions & 0 deletions code/0_process_data/2_resample_CPIS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# This script counts the number of CPIS in the year 2000 and 2021 for each country in Africa.
# It then produces a map of the % change in CPIS between 2000 and 2021.

import yaml
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import utils

# Load the configuration file
with open('config.yaml', 'r') as f:
config = yaml.safe_load(f)

# Read the shapefiles
CPIS2000_Africa = gpd.read_file(config["Africa_CPIS_2000_shp_path"])
CPIS2021_Africa = gpd.read_file(config["Africa_CPIS_2021_shp_path"])

# Count the number of CPIS in each year for each country
country_counts_2000 = CPIS2000_Africa['NAME_0'].value_counts()
country_counts_2000 = country_counts_2000.reset_index() # Convert the Series to a DataFrame
country_counts_2000.columns = ['NAME_0', 'CPIS_2000'] # Rename the columns

# Repeat the process for 2021
country_counts_2021 = CPIS2021_Africa['NAME_0'].value_counts()
country_counts_2021 = country_counts_2021.reset_index() # Convert the Series to a DataFrame
country_counts_2021.columns = ['NAME_0', 'CPIS_2021'] # Rename the columns

# Combine the counts into a single dataframe by merging on the country name
country_counts = country_counts_2000.merge(country_counts_2021, on='NAME_0')

# Calculate the change in CPIS between 2000 and 2021
country_counts['change'] = ((country_counts['CPIS_2021'] - country_counts['CPIS_2000'])) * 100

# save the country_counts dataframe
country_counts.to_csv(config['country_CPIS_count'], index=False)

# Merge the change with the Africa shapefile
Africa = gpd.read_file(config["Africa_countries_shp_path"])
Africa_counts = Africa.merge(country_counts, on='NAME_0')

# Plot the change in CPIS between 2000 and 2021
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
Africa.boundary.plot(ax=ax, color = 'black')
Africa_counts.plot(column='change', cmap='Blues', legend=True, ax=ax)
ax.set_title('Change in CPIS between 2000 and 2021')
plt.show()

# Save the figure
fig.savefig(utils.make_output_path("CPIS_change_country_map.png"))

# Map the number of CPIS in 2000
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
Africa.boundary.plot(ax=ax, color = 'black')
Africa_counts.plot(column='CPIS_2000', cmap='Blues', legend=True, ax=ax)
ax.set_title('Number of CPIS in 2000')
plt.show()

# Save the figure
fig.savefig(utils.make_output_path("CPIS_2000_country_map.png"))

# Map the number of CPIS in 2021
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
Africa.boundary.plot(ax=ax, color = 'black')
Africa_counts.plot(column='CPIS_2021', cmap='Blues', legend=True, ax=ax)
ax.set_title('Number of CPIS in 2021')
plt.show()

# Save the figure
fig.savefig(utils.make_output_path("CPIS_2021_country_map.png"))

#plot both 2000 and 2021 CPIS on the same map
# Create a normalization instance with the min and max values across both columns
vmin = min(Africa_counts['CPIS_2000'].min(), Africa_counts['CPIS_2021'].min())
vmax = max(Africa_counts['CPIS_2000'].max(), Africa_counts['CPIS_2021'].max())
norm = mcolors.Normalize(vmin=vmin, vmax=vmax)

# Create subplots
fig, axes = plt.subplots(1, 2, figsize=(20, 10))

# Plot the number of CPIS in 2000
Africa.boundary.plot(ax=axes[0], color='black')
Africa_counts.plot(column='CPIS_2000', cmap='Blues', norm=norm, legend=True, ax=axes[0])
axes[0].set_title('Number of CPIS in 2000')

# Plot the number of CPIS in 2021
Africa.boundary.plot(ax=axes[1], color='black')
Africa_counts.plot(column='CPIS_2021', cmap='Blues', norm=norm, legend=True, ax=axes[1])
axes[1].set_title('Number of CPIS in 2021')

# Adjust layout
plt.tight_layout()

# Show the combined plot
plt.show()

# Save the figure
fig.savefig('CPIS_2000_2021_country_map.png')
174 changes: 146 additions & 28 deletions code/0_process_data/scratch.ipynb

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions code/0_process_data/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# This file contains various functions used multiple times throughout the project. It is imported in other scripts to avoid code duplication.
import os
import inspect

def make_output_path(file_name):
"""
Create the output path to save a file in the output folder, orgaized in the same way as the code folder.
"""

# get the name of the current file
current_script_path = inspect.stack()[1].filename
print(current_script_path)

# remove the extension
current_script_path = os.path.splitext(current_script_path)[0]
print(current_script_path)

# edit the current script path to replace the folder "code" the folder "output"
output_path = current_script_path.replace("code", "output")
print(output_path)

# create the output folder if it does not exist
if not os.path.exists(output_path):
os.makedirs(output_path)

# create the output path
output_path = os.path.join(output_path, file_name)

return output_path


def create_lagged_column(df, value_col, lag, new_col_name=None):
"""
Create a lagged column for a given variable in the DataFrame.
Parameters:
df (pd.DataFrame): The input DataFrame.
value_col (str): The name of the column containing the values to be lagged.
lag (int): The number of years to lag.
new_col_name (str): The name of the new lagged column. If None, defaults to 'value_col_lagX'.
Returns:
pd.DataFrame: The DataFrame with the new lagged column.
"""
df = df.copy()
if new_col_name is None:
new_col_name = f'{value_col}_lag{lag}'

df[new_col_name] = None # Initialize the new column with None
for idx, row in df.iterrows():
country = row['Country']
year = row['Year']
lagged_year = year - lag
lagged_value = df[(df['Country'] == country) & (df['Year'] == lagged_year)][value_col]
if not lagged_value.empty:
df.at[idx, new_col_name] = lagged_value.values[0]
return df
1 change: 0 additions & 1 deletion code/utils.py

This file was deleted.

10 changes: 4 additions & 6 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@ CPIS_2021_shp_path: 'data/raw/CPIS/World_CPIS_2021.shp' # from https://github.co
Africa_countries_shp_path: 'data/raw/Africa_Boundaries-shp/Africa_Boundaries.shp' # from https://hub.arcgis.com/datasets/07610d73964e4d39ab62c4245d548625/explore
Africa_CPIS_2000_shp_path: 'data/raw/CPIS/Africa_CPIS_2000.shp' # same as CPIS_2000_shp_path but filtered to Africa
Africa_CPIS_2021_shp_path: 'data/raw/CPIS/Africa_CPIS_2021.shp' # same as CPIS_2021_shp_path but filtered to Africa
government_effectiveness_path: 'data/raw/government_effectiveness/government_effectiveness.csv' # from https://data.worldbank.org/indicator/GE.EST. Renamed and header removed
Africa_CPIS_shp_path: 'data/raw/CPIS/Africa_CPIS.shp' # merged Africa_CPIS_2000_shp_path and Africa_CPIS_2021_shp_path
government_effectiveness_path: 'data/raw/government_effectiveness/government_effectiveness.csv' # from https://data.worldbank.org/indicator/GE.EST. Renamed, header removed, empty and irrelevant columns (e.g. what the value is) removed.
SPEI_12_path: 'data/raw/SPEI_drought/spei12.nc' # from https://data.ceda.ac.uk/neodc/spei_africa/data. This one is at a 12 month scale.

# processed data

# outputs
CPIS_change_country_map_path: 'outputs/CPIS_change_country_map.pdf'
CPIS_2000_country_map_path: 'outputs/CPIS_2000_country_map.pdf'
CPIS_2021_country_map_path: 'outputs/CPIS_2021_country_map.pdf'
country_CPIS_count: 'data/processed/country_CPIS_count.csv' # number of CPIS points per country
Africa_CPIS_GEI_shp_path: 'data/processed/CPIS/Africa_CPIS_GEI.shp' # Africa_CPIS_shp_path with government effectiveness data
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 32c0da6

Please sign in to comment.