1
- import matplotlib .colors as colors
2
- import matplotlib .pyplot as plt
3
1
import numpy as np
4
2
import pandas as pd
5
3
from scipy .stats import norm
6
4
7
5
from ahah .common .utils import Config , combine_lsoa
8
6
9
7
10
- def exp_default (x , df ):
11
- return norm .ppf ((x - 0.5 ) / len (df ))
12
-
13
-
14
8
def exp_trans (x , df ):
15
9
return - 23 * np .log (1 - (x / len (df )) * (1 - np .exp (- 100 / 23 )))
16
10
17
11
12
+ def exp_default (x , df ):
13
+ return norm .ppf ((x - 0.5 ) / len (df ))
14
+
15
+
18
16
def read_v3 ():
19
17
v3 = pd .read_csv ("./data/out/weighted_mean_dists.csv" )
20
18
v3_secure = pd .read_csv (
@@ -31,20 +29,37 @@ def read_v2():
31
29
32
30
33
31
def process (idx , low_dist , env_dist , air_qual , high_dist ):
34
- idx [low_dist ] = idx [low_dist ].rank (method = "min" ).astype (int )
35
- idx [env_dist ] = idx [env_dist ].rank (method = "min" ).astype (int )
36
- idx [air_qual ] = idx [air_qual ].rank (method = "min" ).astype (int )
37
- idx [high_dist ] = idx [high_dist ].rank (method = "min" , ascending = False ).astype (int )
38
-
39
- idx [low_dist + env_dist + air_qual + high_dist ] = exp_default (
40
- idx [low_dist + env_dist + air_qual + high_dist ],
41
- idx ,
32
+ low_dist_ranked = [f"{ asset } _ranked" for asset in low_dist ]
33
+ env_dist_ranked = [f"{ asset } _ranked" for asset in env_dist ]
34
+ air_qual_ranked = [f"{ asset } _ranked" for asset in air_qual ]
35
+ high_dist_ranked = [f"{ asset } _ranked" for asset in high_dist ]
36
+
37
+ low_dist_expd = [f"{ asset } _expd" for asset in low_dist ]
38
+ env_dist_expd = [f"{ asset } _expd" for asset in env_dist ]
39
+ air_qual_expd = [f"{ asset } _expd" for asset in air_qual ]
40
+ high_dist_expd = [f"{ asset } _expd" for asset in high_dist ]
41
+
42
+ idx [low_dist_ranked ] = idx [low_dist ].rank (method = "min" ).astype (int )
43
+ idx [env_dist_ranked ] = idx [env_dist ].rank (method = "min" ).astype (int )
44
+ idx [air_qual_ranked ] = idx [air_qual ].rank (method = "min" ).astype (int )
45
+ idx [high_dist_ranked ] = (
46
+ idx [high_dist ].rank (method = "min" , ascending = False ).astype (int )
47
+ )
48
+
49
+ # higher values os gspassive are better (prop of pc that is gs)
50
+ idx [env_dist_ranked [1 ]] = (
51
+ idx [env_dist [1 ]].rank (method = "min" , ascending = False ).astype (int )
42
52
)
43
53
44
- idx ["r_domain" ] = idx [high_dist ].mean (axis = 1 )
45
- idx ["h_domain" ] = idx [low_dist ].mean (axis = 1 )
46
- idx ["g_domain" ] = idx [env_dist ].mean (axis = 1 )
47
- idx ["e_domain" ] = idx [air_qual ].mean (axis = 1 )
54
+ idx [low_dist_expd ] = exp_default (idx [low_dist_ranked ], idx )
55
+ idx [env_dist_expd ] = exp_default (idx [env_dist_ranked ], idx )
56
+ idx [air_qual_expd ] = exp_default (idx [air_qual_ranked ], idx )
57
+ idx [high_dist_expd ] = exp_default (idx [high_dist_ranked ], idx )
58
+
59
+ idx ["h_domain" ] = idx [low_dist_expd ].mean (axis = 1 )
60
+ idx ["g_domain" ] = idx [env_dist_expd ].mean (axis = 1 )
61
+ idx ["e_domain" ] = idx [air_qual_expd ].mean (axis = 1 )
62
+ idx ["r_domain" ] = idx [high_dist_expd ].mean (axis = 1 )
48
63
49
64
idx ["r_rank" ] = idx ["r_domain" ].rank (method = "min" ).astype (int )
50
65
idx ["h_rank" ] = idx ["h_domain" ].rank (method = "min" ).astype (int )
@@ -57,61 +72,34 @@ def process(idx, low_dist, env_dist, air_qual, high_dist):
57
72
idx ["e_exp" ] = exp_trans (idx ["e_rank" ], idx )
58
73
59
74
idx ["ahah" ] = idx [["r_exp" , "h_exp" , "g_exp" , "e_exp" ]].mean (axis = 1 )
75
+ idx ["r_ahah" ] = idx ["ahah" ].rank (method = "min" ).astype (int )
76
+ idx ["d_ahah" ] = pd .qcut (idx ["r_ahah" ], 10 , labels = False )
60
77
return idx
61
78
62
79
63
- low_dist = ["gpp" , "dentists" , "pharmacies" , "hospitals" , "leisure" ]
64
- env_dist = ["greenspace" , "gspassive" , "bluespace" ]
65
- air_qual = ["no22019" , "so22019" , "pm102019g" ]
66
- high_dist = ["gambling" , "offlicences" , "pubs" , "tobacconists" , "fastfood" ]
67
- v3 = read_v3 ().dropna ()
68
- v3 = process (v3 , low_dist , env_dist , air_qual , high_dist )
69
-
70
- low_dist = ["gpp_dist" , "ed_dist" , "dent_dist" , "pharm_dist" , "leis_dist" ]
71
- env_dist = ["green_act" , "green_pas" , "blue_dist" ]
72
- air_qual = ["no2_mean" , "pm10_mean" , "so2_mean" ]
73
- high_dist = ["gamb_dist" , "ffood_dist" , "pubs_dist" , "off_dist" , "tobac_dist" ]
74
- v2 = read_v2 ()
75
- v2 = process (v2 , low_dist , env_dist , air_qual , high_dist )
76
-
77
- lsoa = combine_lsoa (
78
- eng = Config .RAW_DATA / "lsoa" / "england_lsoa_2011.shp" ,
79
- scot = Config .RAW_DATA / "lsoa" / "SG_DataZone_Bdry_2011.shp" ,
80
- wales = Config .RAW_DATA / "lsoa" / "lsoa_wales_2011.gpkg" ,
81
- )
82
-
83
- v3 = lsoa .merge (v3 , on = "lsoa11" , how = "outer" )
84
- v2 = lsoa .merge (v2 , on = "lsoa11" , how = "outer" )
85
-
86
- v3 .to_file (Config .OUT_DATA / "v3_lsoa.gpkg" , driver = "GPKG" )
87
- v2 .to_file (Config .OUT_DATA / "v2_lsoa.gpkg" , driver = "GPKG" )
88
-
89
- ax = plt .figure ().subplots (1 , 2 )
90
- col = "ahah"
91
- v3 .plot (
92
- column = col ,
93
- legend = True ,
94
- cmap = "RdYlBu_r" ,
95
- norm = colors .TwoSlopeNorm (vcenter = 50 , vmin = 0 , vmax = 100 ),
96
- ax = ax [0 ],
97
- )
98
- col = "ahah"
99
- v2 .plot (
100
- column = col ,
101
- legend = True ,
102
- cmap = "RdYlBu_r" ,
103
- norm = colors .TwoSlopeNorm (vcenter = 50 , vmin = 0 , vmax = 100 ),
104
- ax = ax [1 ],
105
- )
106
- plt .show ()
107
-
108
- test = v3 .set_index ("lsoa11" ).join (v2 .set_index ("lsoa11" ), rsuffix = "_v2" )
109
- test ["diff" ] = test ["ahah" ] - test ["ahah_v2" ]
110
-
111
- test .plot (
112
- column = "diff" ,
113
- legend = True ,
114
- cmap = "RdYlBu_r" ,
115
- # norm=colors.TwoSlopeNorm(vcenter=50, vmin=0, vmax=100),
116
- )
117
- plt .show ()
80
+ if __name__ == "__main__" :
81
+ low_dist = ["gpp" , "dentists" , "pharmacies" , "hospitals" , "leisure" ]
82
+ env_dist = ["greenspace" , "gspassive" , "bluespace" ]
83
+ air_qual = ["no22019" , "so22019" , "pm102019g" ]
84
+ high_dist = ["gambling" , "offlicences" , "pubs" , "tobacconists" , "fastfood" ]
85
+ v3 = read_v3 ().dropna ()
86
+ v3 = process (v3 , low_dist , env_dist , air_qual , high_dist )
87
+
88
+ low_dist = ["gpp_dist" , "ed_dist" , "dent_dist" , "pharm_dist" , "leis_dist" ]
89
+ env_dist = ["green_act" , "green_pas" , "blue_dist" ]
90
+ air_qual = ["no2_mean" , "pm10_mean" , "so2_mean" ]
91
+ high_dist = ["gamb_dist" , "ffood_dist" , "pubs_dist" , "off_dist" , "tobac_dist" ]
92
+ v2 = read_v2 ()
93
+ v2 = process (v2 , low_dist , env_dist , air_qual , high_dist )
94
+
95
+ lsoa = combine_lsoa (
96
+ eng = Config .RAW_DATA / "lsoa" / "england_lsoa_2011.shp" ,
97
+ scot = Config .RAW_DATA / "lsoa" / "SG_DataZone_Bdry_2011.shp" ,
98
+ wales = Config .RAW_DATA / "lsoa" / "lsoa_wales_2011.gpkg" ,
99
+ )
100
+
101
+ v3 = lsoa .merge (v3 , on = "lsoa11" , how = "outer" )
102
+ v2 = lsoa .merge (v2 , on = "lsoa11" , how = "outer" )
103
+
104
+ v3 .to_file (Config .OUT_DATA / "v3_lsoa.gpkg" , driver = "GPKG" )
105
+ v2 .to_file (Config .OUT_DATA / "v2_lsoa.gpkg" , driver = "GPKG" )
0 commit comments