made some maps

anna-boser · May 29, 2024 · 32c0da6 · 32c0da6
1 parent 875caec
commit 32c0da6
Show file tree

Hide file tree

Showing 13 changed files with 389 additions and 98 deletions.
diff --git a/code/0_process_data/0_subset_CPIS.py b/code/0_process_data/0_subset_CPIS.py
@@ -2,6 +2,9 @@
 
 import yaml
 import geopandas as gpd
+import pandas as pd
+import matplotlib.pyplot as plt
+import utils
 
 # Load the configuration file
 with open('config.yaml', 'r') as f:
@@ -12,10 +15,57 @@
 CPIS2021 = gpd.read_file(config["CPIS_2021_shp_path"])
 Africa = gpd.read_file(config["Africa_countries_shp_path"])
 
+# Rename the country name column in the Africa shapefile from "NAME_0" to "Country"
+Africa = Africa.rename(columns={'NAME_0': 'Country', 'ISO' : 'Country Code'}) # Unfortunately 'Country Code' will be renamed to 'Country Co' when saving due to a 10-character limit on shapefiles.
+
+# Remove the 'OBJECTID', 'Continent', 'REgion' columns
+Africa = Africa.drop(columns=['OBJECTID', 'Continent', 'REgion'])
+
+# Save this shapefile
+Africa.to_file(config["Africa_countries_shp_path"])
+
 # Perform the spatial intersection
 CPIS2000_Africa = gpd.overlay(CPIS2000, Africa, how='intersection')
 CPIS2021_Africa = gpd.overlay(CPIS2021, Africa, how='intersection')
 
 # Save the intersected shapefile
 CPIS2000_Africa.to_file(config["Africa_CPIS_2000_shp_path"])
 CPIS2021_Africa.to_file(config["Africa_CPIS_2021_shp_path"])
+
+# # Read the shapefiles
+# CPIS2000_Africa = gpd.read_file(config["Africa_CPIS_2000_shp_path"])
+# CPIS2021_Africa = gpd.read_file(config["Africa_CPIS_2021_shp_path"])
+# Africa = gpd.read_file(config["Africa_countries_shp_path"])
+
+# Add a 'year' column to each dataframe
+CPIS2000_Africa['Year'] = 2000
+CPIS2021_Africa['Year'] = 2021
+
+# Merge the two dataframes
+CPIS_Africa = gpd.GeoDataFrame(pd.concat([CPIS2000_Africa, CPIS2021_Africa], ignore_index=True))
+
+# Save the merged shapefile
+CPIS_Africa.to_file(config["Africa_CPIS_shp_path"])
+
+# Buffer the geometry to make it more visible
+CPIS_Africa['geometry'] = CPIS_Africa.geometry.buffer(0.1)
+
+# Create a single plot with two facets
+fig, axes = plt.subplots(1, 2, figsize=(20, 10))
+
+# Plot the CPIS data for 2000
+Africa.boundary.plot(ax=axes[0], color='black')
+CPIS_Africa[CPIS_Africa['Year'] == 2000].plot(ax=axes[0], color='green')
+axes[0].set_title('CPIS in 2000 in Africa')
+
+# Plot the CPIS data for 2021
+Africa.boundary.plot(ax=axes[1], color='black')
+CPIS_Africa[CPIS_Africa['Year'] == 2021].plot(ax=axes[1], color='green')
+axes[1].set_title('CPIS in 2021 in Africa')
+
+# Adjust layout and show the plot
+plt.tight_layout()
+plt.show()
+
+# Save the combined figure
+fig.savefig(utils.make_output_path('CPIS_2000_2021_Africa.png'))
diff --git a/code/0_process_data/1_government_effectiveness.py b/code/0_process_data/1_government_effectiveness.py
@@ -0,0 +1,35 @@
+# Add the government effectiveness index to the CPIS data
+
+import geopandas as gpd
+import pandas as pd
+import utils
+import yaml
+
+# Load the configuration file
+with open('config.yaml', 'r') as f:
+    config = yaml.safe_load(f)
+
+# read in the government effectiveness index data
+GEI = pd.read_csv(config["government_effectiveness_path"]) 
+
+# Rename the GEI "Country Name" column to "NAME_0" to match the CPIS data
+GEI = GEI.rename(columns={'Country Name': 'Country'})
+
+# pivot the GEI data so the year becomes one column
+GEI = pd.melt(GEI, id_vars=['Country', 'Country Code'], var_name='Year', value_name='GEI')
+
+# Convert the year column to integer
+GEI['Year'] = GEI['Year'].astype(int)
+
+# Add a two year and four year lag GEI variable to the GEI data
+GEI = utils.create_lagged_column(GEI, 'GEI', 2)
+GEI = utils.create_lagged_column(GEI, 'GEI', 4)
+
+# read in the 2000 and 2021 cpis data
+CPIS = gpd.read_file(config["Africa_CPIS_shp_path"])
+
+# Add GEI information to the CPIS data by matching on year and country
+CPIS = CPIS.merge(GEI, on=['Year', 'Country', 'Country Code'])
+
+# Save the CPIS data with GEI information
+CPIS.to_file(config["Africa_CPIS_GEI_shp_path"])
diff --git a/code/0_process_data/1_resample_CPIS.py b/code/0_process_data/1_resample_CPIS.py
diff --git a/code/0_process_data/2_resample_CPIS.py b/code/0_process_data/2_resample_CPIS.py
@@ -0,0 +1,97 @@
+# This script counts the number of CPIS in the year 2000 and 2021 for each country in Africa.
+# It then produces a map of the % change in CPIS between 2000 and 2021.
+
+import yaml
+import geopandas as gpd
+import pandas as pd
+import matplotlib.pyplot as plt
+import utils
+
+# Load the configuration file
+with open('config.yaml', 'r') as f:
+    config = yaml.safe_load(f)
+
+# Read the shapefiles
+CPIS2000_Africa = gpd.read_file(config["Africa_CPIS_2000_shp_path"])
+CPIS2021_Africa = gpd.read_file(config["Africa_CPIS_2021_shp_path"])
+
+# Count the number of CPIS in each year for each country
+country_counts_2000 = CPIS2000_Africa['NAME_0'].value_counts()
+country_counts_2000 = country_counts_2000.reset_index() # Convert the Series to a DataFrame
+country_counts_2000.columns = ['NAME_0', 'CPIS_2000'] # Rename the columns
+
+# Repeat the process for 2021
+country_counts_2021 = CPIS2021_Africa['NAME_0'].value_counts()
+country_counts_2021 = country_counts_2021.reset_index() # Convert the Series to a DataFrame
+country_counts_2021.columns = ['NAME_0', 'CPIS_2021'] # Rename the columns
+
+# Combine the counts into a single dataframe by merging on the country name
+country_counts = country_counts_2000.merge(country_counts_2021, on='NAME_0')
+
+# Calculate the change in CPIS between 2000 and 2021
+country_counts['change'] = ((country_counts['CPIS_2021'] - country_counts['CPIS_2000'])) * 100
+
+# save the country_counts dataframe
+country_counts.to_csv(config['country_CPIS_count'], index=False)
+
+# Merge the change with the Africa shapefile
+Africa = gpd.read_file(config["Africa_countries_shp_path"])
+Africa_counts = Africa.merge(country_counts, on='NAME_0')
+
+# Plot the change in CPIS between 2000 and 2021
+fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+Africa.boundary.plot(ax=ax, color = 'black')
+Africa_counts.plot(column='change', cmap='Blues', legend=True, ax=ax)
+ax.set_title('Change in CPIS between 2000 and 2021')
+plt.show()
+
+# Save the figure
+fig.savefig(utils.make_output_path("CPIS_change_country_map.png"))
+
+# Map the number of CPIS in 2000
+fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+Africa.boundary.plot(ax=ax, color = 'black')
+Africa_counts.plot(column='CPIS_2000', cmap='Blues', legend=True, ax=ax)
+ax.set_title('Number of CPIS in 2000')
+plt.show()
+
+# Save the figure
+fig.savefig(utils.make_output_path("CPIS_2000_country_map.png"))
+
+# Map the number of CPIS in 2021
+fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+Africa.boundary.plot(ax=ax, color = 'black')
+Africa_counts.plot(column='CPIS_2021', cmap='Blues', legend=True, ax=ax)
+ax.set_title('Number of CPIS in 2021')
+plt.show()
+
+# Save the figure
+fig.savefig(utils.make_output_path("CPIS_2021_country_map.png"))
+
+#plot both 2000 and 2021 CPIS on the same map
+# Create a normalization instance with the min and max values across both columns
+vmin = min(Africa_counts['CPIS_2000'].min(), Africa_counts['CPIS_2021'].min())
+vmax = max(Africa_counts['CPIS_2000'].max(), Africa_counts['CPIS_2021'].max())
+norm = mcolors.Normalize(vmin=vmin, vmax=vmax)
+
+# Create subplots
+fig, axes = plt.subplots(1, 2, figsize=(20, 10))
+
+# Plot the number of CPIS in 2000
+Africa.boundary.plot(ax=axes[0], color='black')
+Africa_counts.plot(column='CPIS_2000', cmap='Blues', norm=norm, legend=True, ax=axes[0])
+axes[0].set_title('Number of CPIS in 2000')
+
+# Plot the number of CPIS in 2021
+Africa.boundary.plot(ax=axes[1], color='black')
+Africa_counts.plot(column='CPIS_2021', cmap='Blues', norm=norm, legend=True, ax=axes[1])
+axes[1].set_title('Number of CPIS in 2021')
+
+# Adjust layout
+plt.tight_layout()
+
+# Show the combined plot
+plt.show()
+
+# Save the figure
+fig.savefig('CPIS_2000_2021_country_map.png')
diff --git a/code/0_process_data/scratch.ipynb b/code/0_process_data/scratch.ipynb
diff --git a/code/0_process_data/utils.py b/code/0_process_data/utils.py
@@ -0,0 +1,57 @@
+# This file contains various functions used multiple times throughout the project. It is imported in other scripts to avoid code duplication.
+import os
+import inspect
+
+def make_output_path(file_name):
+    """
+    Create the output path to save a file in the output folder, orgaized in the same way as the code folder.
+    """
+
+    # get the name of the current file
+    current_script_path = inspect.stack()[1].filename
+    print(current_script_path)
+
+    # remove the extension
+    current_script_path = os.path.splitext(current_script_path)[0]
+    print(current_script_path)
+
+    # edit the current script path to replace the folder "code" the folder "output"
+    output_path = current_script_path.replace("code", "output")
+    print(output_path)
+
+    # create the output folder if it does not exist
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    # create the output path
+    output_path = os.path.join(output_path, file_name)
+
+    return output_path
+
+
+def create_lagged_column(df, value_col, lag, new_col_name=None):
+    """
+    Create a lagged column for a given variable in the DataFrame.
+    
+    Parameters:
+    df (pd.DataFrame): The input DataFrame.
+    value_col (str): The name of the column containing the values to be lagged.
+    lag (int): The number of years to lag.
+    new_col_name (str): The name of the new lagged column. If None, defaults to 'value_col_lagX'.
+    
+    Returns:
+    pd.DataFrame: The DataFrame with the new lagged column.
+    """
+    df = df.copy()
+    if new_col_name is None:
+        new_col_name = f'{value_col}_lag{lag}'
+
+    df[new_col_name] = None  # Initialize the new column with None
+    for idx, row in df.iterrows():
+        country = row['Country']
+        year = row['Year']
+        lagged_year = year - lag
+        lagged_value = df[(df['Country'] == country) & (df['Year'] == lagged_year)][value_col]
+        if not lagged_value.empty:
+            df.at[idx, new_col_name] = lagged_value.values[0]
+    return df
diff --git a/code/utils.py b/code/utils.py
diff --git a/config.yaml b/config.yaml
@@ -9,12 +9,10 @@ CPIS_2021_shp_path: 'data/raw/CPIS/World_CPIS_2021.shp' # from https://github.co
 Africa_countries_shp_path: 'data/raw/Africa_Boundaries-shp/Africa_Boundaries.shp' # from https://hub.arcgis.com/datasets/07610d73964e4d39ab62c4245d548625/explore
 Africa_CPIS_2000_shp_path: 'data/raw/CPIS/Africa_CPIS_2000.shp' # same as CPIS_2000_shp_path but filtered to Africa
 Africa_CPIS_2021_shp_path: 'data/raw/CPIS/Africa_CPIS_2021.shp' # same as CPIS_2021_shp_path but filtered to Africa
-government_effectiveness_path: 'data/raw/government_effectiveness/government_effectiveness.csv' # from https://data.worldbank.org/indicator/GE.EST. Renamed and header removed
+Africa_CPIS_shp_path: 'data/raw/CPIS/Africa_CPIS.shp' # merged Africa_CPIS_2000_shp_path and Africa_CPIS_2021_shp_path
+government_effectiveness_path: 'data/raw/government_effectiveness/government_effectiveness.csv' # from https://data.worldbank.org/indicator/GE.EST. Renamed, header removed, empty and irrelevant columns (e.g. what the value is) removed.
 SPEI_12_path: 'data/raw/SPEI_drought/spei12.nc' # from https://data.ceda.ac.uk/neodc/spei_africa/data. This one is at a 12 month scale.
 
 # processed data
-
-# outputs
-CPIS_change_country_map_path: 'outputs/CPIS_change_country_map.pdf'
-CPIS_2000_country_map_path: 'outputs/CPIS_2000_country_map.pdf'
-CPIS_2021_country_map_path: 'outputs/CPIS_2021_country_map.pdf'
+country_CPIS_count: 'data/processed/country_CPIS_count.csv' # number of CPIS points per country
+Africa_CPIS_GEI_shp_path: 'data/processed/CPIS/Africa_CPIS_GEI.shp' # Africa_CPIS_shp_path with government effectiveness data
diff --git a/output/0_process_data/0_subset_CPIS/CPIS_2000_2021_Africa.png b/output/0_process_data/0_subset_CPIS/CPIS_2000_2021_Africa.png
diff --git a/output/0_process_data/0_subset_CPIS/CPIS_2000_Africa.png b/output/0_process_data/0_subset_CPIS/CPIS_2000_Africa.png
diff --git a/output/0_process_data/0_subset_CPIS/CPIS_2021_Africa.png b/output/0_process_data/0_subset_CPIS/CPIS_2021_Africa.png
diff --git a/output/0_process_data/2_resample_CPIS/CPIS_2000_country_map.png b/output/0_process_data/2_resample_CPIS/CPIS_2000_country_map.png
diff --git a/output/0_process_data/2_resample_CPIS/CPIS_change_country_map.png b/output/0_process_data/2_resample_CPIS/CPIS_change_country_map.png