-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f8fb74f
Showing
5 changed files
with
814 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
#!/usr/bin/env | ||
|
||
""" This file contains code for reading Direction of Trade data from the IMF into a weighted, directed network | ||
and using it to find trade communities """ | ||
|
||
import networkx as nx | ||
import pandas as pd | ||
import math | ||
from modularity_maximization import partition | ||
from modularity_maximization.utils import get_modularity | ||
import pickle | ||
|
||
|
||
def create_network_dict(df, years): | ||
""" | ||
Returns a dictionary of networks with the relevant years as keys. | ||
:param df: pandas dataframe of exports to use in creating networks | ||
:param years: iterable of integer years for which to create networks | ||
:return: dictionary of networkx graphs with integer years as keys | ||
""" | ||
|
||
networks = {} | ||
for year in years: | ||
print('Creating network for %d...' % year) | ||
networks[year] = create_dot_network(df, str(year)) | ||
|
||
return networks | ||
|
||
|
||
def create_dot_network(df, year): | ||
""" | ||
Returns networkx directed graph of international trade with country codes as nodes. | ||
:param df: pandas dataframe of trade exports with each row representing trade between 2 countries and columns for | ||
each year of data | ||
:param year: string year to create network for | ||
:return graph: newtorkx directed graph with exports in USD as edge weights | ||
""" | ||
|
||
# extract only relevant data from dataframe | ||
data = df[['Country Code', 'Counterpart Country Code', year]] | ||
|
||
# initialize networkx DiGraph | ||
network = nx.DiGraph() | ||
# add edge to graph for each row of data not equal to NaN | ||
for index, row in data.iterrows(): | ||
if not math.isnan(float(row[year])): | ||
network.add_edge(int(row['Country Code']), int(row['Counterpart Country Code']), | ||
weight=row[year]) | ||
|
||
return network | ||
|
||
|
||
def extract_relevant_rows(df, column_name, column_value, not_equal=False): | ||
""" | ||
Returns pandas dataframe consisting only of rows with specific values in a specific column. | ||
:param df: pandas dataframe to extract rows from | ||
:param column_name: name of column requiring specific value | ||
:param column_value: value required in column | ||
:param not_equal: boolean for whether to return rows equal to passed in values (False) or not | ||
equal to passed in values (True) | ||
:return: pandas dataframe consisting only of desired rows | ||
""" | ||
|
||
if not_equal: | ||
return df.loc[df[column_name] != column_value] | ||
|
||
return df.loc[df[column_name] == column_value] | ||
|
||
|
||
def prepare_data(filename='data/DOT_timeSeries.csv'): | ||
""" | ||
Reads in DOT datafile and filters for relevant information. | ||
:param filename: string path to csv file | ||
:return: pandas dataframe constructed from datafile and filtered for relevant rows | ||
""" | ||
|
||
# read data file into pandas dataframe | ||
df = pd.read_csv(filename) | ||
|
||
# extract unwanted 'countries' from dataframe | ||
countries = ['Europe', 'Emerging and Developing Europe', 'Emerging and Developing Asia', | ||
'Middle East, North Africa, and Pakistan', 'Export earnings: nonfuel', | ||
'Sub-Saharan Africa', 'Export earnings: fuel', 'Western Hemisphere', | ||
'World', 'Special Categories', 'Advanced Economies', 'CIS', | ||
'Emerging and Developing Economies'] | ||
for country in countries: | ||
df = extract_relevant_rows(df, column_name='Country Name', column_value=country, not_equal=True) | ||
df = extract_relevant_rows(df, column_name='Counterpart Country Name', column_value=country, not_equal=True) | ||
|
||
# extract exports only from data | ||
exports = extract_relevant_rows(df, column_name='Indicator Code', column_value='TXG_FOB_USD') | ||
# extract value attributes only from exports | ||
export_values = extract_relevant_rows(exports, column_name='Attribute', column_value='Value') | ||
|
||
return export_values | ||
|
||
|
||
def create_country_code_dict(df): | ||
""" | ||
Creates a dictionary of country names with country codes as keys from the passed in dataframe. | ||
:param df: pandas dataframe from which to extract country codes & names | ||
:return: dictionary with country codes as keys and country names as values | ||
""" | ||
|
||
code_dict = {} | ||
|
||
# check both country and counterpart country columns for unique country codes | ||
for col in ['Country', 'Counterpart Country']: | ||
for code in df[col + ' Code'].unique(): | ||
code_dict[int(code)] = df.loc[df[col + ' Code'] == code][col + ' Name'].values[0] | ||
|
||
return code_dict | ||
|
||
|
||
def find_and_print_network_communities(G, code_dict=None): | ||
""" | ||
Finds network communities through modularity maximization and returns dictionary of community | ||
members by country name with community numbers as keys. | ||
:param G: networkx Graph to find communities in | ||
:param code_dict: dictionary mapping country codes to names - if passed in, will use mappings for | ||
recording community members | ||
:return: 1. dictionary with community numbers as keys and list of string country names as values | ||
2. modularity of discovered community partitions | ||
""" | ||
|
||
comm_dict = partition(G) | ||
|
||
comm_members = {} | ||
for comm in set(comm_dict.values()): | ||
countries = [node for node in comm_dict if comm_dict[node] == comm] | ||
if code_dict is not None: | ||
countries = [code_dict[code] for code in countries] | ||
|
||
comm_members[comm] = countries | ||
|
||
return comm_members, get_modularity(G, comm_dict) | ||
|
||
|
||
def get_network_info_dict(network): | ||
""" | ||
Returns dictionary of network characteristics obtained from networkx.info method. | ||
:param network: network to get info on | ||
:return: dictionary mapping network characteristic name to value | ||
""" | ||
info_str = nx.info(network) | ||
lines = info_str.split('\n') | ||
|
||
info_dict = {} | ||
for line in lines: | ||
pair = line.split(':') | ||
info_dict[pair[0]] = pair[1].strip() | ||
|
||
return info_dict | ||
|
||
|
||
def save_all_community_information(networks, code_dict=None, filename='data/communities.pkl'): | ||
""" | ||
Finds communities in each network and saves modularity, network info, and community members to file. | ||
:param networks: dictionary mapping integer years to networks | ||
:param code_dict: dictionary mapping country codes to names - if passed in, will use mappings for | ||
recording community members | ||
:param filename: string name, including extension, of file to save info to | ||
:return: nothing, saves network info to 'communities.pkl' | ||
""" | ||
|
||
save_dict = {} | ||
for year, network in networks.items(): | ||
print('Finding communities for %d network...' % year) | ||
comms, mod = find_and_print_network_communities(network, code_dict) | ||
info_dict = get_network_info_dict(network) | ||
comm_dict = {'modularity': mod, | ||
'communities': comms} | ||
save_dict[year] = {**info_dict, **comm_dict} | ||
|
||
with open(filename, 'wb') as f: | ||
pickle.dump(save_dict, f, pickle.HIGHEST_PROTOCOL) | ||
|
||
|
||
def main(): | ||
# clean data & create country code dictionary | ||
data = prepare_data() | ||
country_dict = create_country_code_dict(data) | ||
|
||
# create dictionary of networks with keys as years | ||
networks = create_network_dict(data, years=range(1948, 2018)) | ||
|
||
# save community info for all networks | ||
save_all_community_information(networks, code_dict=country_dict) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
#!/usr/bin/env | ||
|
||
""" This file contains tests for the dot_network.py file that creates DOT networks """ | ||
|
||
import pandas as pd | ||
import dot_network as dot | ||
import networkx as nx | ||
import pickle | ||
|
||
|
||
class test_dot_network: | ||
|
||
def setup(self): | ||
""" Setup method creates the test csv file and writes to data/test_DOT_files.csv """ | ||
# extract test data from DOT data frame | ||
df = pd.read_csv('data/DOT_timeSeries.csv') | ||
test_df = df.loc[(df['Country Name'] == 'Angola') & (df['Counterpart Country Name'] == 'Colombia')] | ||
test_df = test_df.append(df.loc[(df['Country Name'] == 'Angola') & | ||
(df['Counterpart Country Name'] == 'Moldova')]) | ||
test_df = test_df.append(df.loc[(df['Country Name'] == 'Moldova') & | ||
(df['Counterpart Country Name'] == 'Angola')]) | ||
test_df = test_df.append(df.loc[(df['Country Name'] == 'World') & | ||
(df['Counterpart Country Name'] == 'Moldova')]) | ||
|
||
self.filename = 'data/test_DOT_file.csv' | ||
self.test_data = test_df | ||
self.filtered_data = dot.prepare_data(self.filename) | ||
self.code_dict = dot.create_country_code_dict(self.filtered_data) | ||
self.network = dot.create_dot_network(self.filtered_data, '2007') | ||
self.network_dict = dot.create_network_dict(self.filtered_data, range(2007, 2010)) | ||
|
||
# save test dataframe to file | ||
test_df.to_csv(self.filename) | ||
|
||
def test_extract_relevant_rows(self): | ||
""" Tests that extract_relevant_rows only returns the relevant rows """ | ||
df = dot.extract_relevant_rows(self.test_data, | ||
column_name='Country Name', | ||
column_value='Angola') | ||
assert (df['Country Name'] == 'Angola').all() | ||
|
||
def test_extract_relevant_rows_not_equal(self): | ||
""" Tests that extract_relevant_rows filters out undesired rows when not_equal=True""" | ||
df = dot.extract_relevant_rows(self.test_data, | ||
column_name='Country Name', | ||
column_value='Angola', | ||
not_equal=True) | ||
assert not (df['Country Name'] == 'Angola').any() | ||
|
||
def test_prepare_data(self): | ||
""" Tests the prepare_data function by asserting that only relevant rows are returned """ | ||
df = self.filtered_data | ||
assert len(df) == 3 | ||
assert ((df['Country Name'] == 'Angola') & (df['Counterpart Country Name'] == 'Colombia')).any() | ||
assert ((df['Country Name'] == 'Angola') & (df['Counterpart Country Name'] == 'Moldova')).any() | ||
assert ((df['Country Name'] == 'Moldova') & (df['Counterpart Country Name'] == 'Angola')).any() | ||
|
||
def test_create_dot_network(self): | ||
""" Tests that create_dot_network returns the correct network """ | ||
assert list(self.network.edges.data()) == [(614.0, 233.0, {'weight': 73172520.0}), | ||
(921.0, 614.0, {'weight': 263001.0})] | ||
|
||
def test_create_network_dict(self): | ||
""" Tests that create_network_dict returns a dictionary of networks """ | ||
assert [type(self.network_dict[year]) == nx.DiGraph for year in range(2007, 2010)] | ||
|
||
def test_code_dict_creation(self): | ||
""" Tests that code dict created is correct """ | ||
assert self.code_dict == {921: 'Moldova', 614: 'Angola', 233: 'Colombia'} | ||
|
||
def test_community_finding(self): | ||
""" Tests that network community finding function is working properly """ | ||
comm, mod = dot.find_and_print_network_communities(self.network, code_dict=self.code_dict) | ||
assert comm == {0: ['Colombia', 'Angola', 'Moldova']} | ||
assert mod == 0.0 | ||
|
||
def test_network_info_saving(self): | ||
""" Tests that network community info is correctly saved to file """ | ||
dot.save_all_community_information(self.network_dict, code_dict=self.code_dict, filename='data/test.pkl') | ||
with open('data/test.pkl', 'rb') as f: | ||
loaded = pickle.load(f) | ||
|
||
assert loaded == {2008: {'communities': {0: ['Moldova', 'Angola']}, | ||
'Average in degree': '1.0000', | ||
'Type': 'DiGraph', | ||
'Number of edges': '2', | ||
'Name': '', | ||
'Number of nodes': '2', | ||
'Average out degree': '1.0000', | ||
'modularity': 0.0}, | ||
2009: {'communities': {0: ['Moldova', 'Angola']}, | ||
'Average in degree': '0.5000', | ||
'Type': 'DiGraph', | ||
'Number of edges': '1', | ||
'Name': '', | ||
'Number of nodes': '2', | ||
'Average out degree': '0.5000', | ||
'modularity': 0.0}, | ||
2007: {'communities': {0: ['Colombia', 'Angola', 'Moldova']}, | ||
'Average in degree': '0.6667', | ||
'Type': 'DiGraph', | ||
'Number of edges': '2', | ||
'Name': '', | ||
'Number of nodes': '3', | ||
'Average out degree': '0.6667', | ||
'modularity': 0.0}} |
Oops, something went wrong.