Skip to content

Commit 360d263

Browse files
committed
add welsh pharms
1 parent afe3500 commit 360d263

File tree

5 files changed

+98
-77
lines changed

5 files changed

+98
-77
lines changed

ahah/common/utils.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,17 @@ class Config:
6868
"/d08bc753-c6dc-4dbd-8b37-ef439d3a7428/download"
6969
"/dispenser_contactdetails_oct2020_notabs.csv",
7070
}
71+
NHS_WALES_URL = (
72+
"https://nwssp.nhs.wales/ourservices/"
73+
"primary-care-services/primary-care-services-documents/"
74+
)
75+
76+
NHS_WALES_FILES = {
77+
"pharmacy": (
78+
"pharmacy-practice-dispensing-data-docs"
79+
"/dispensing-data-report-november-2021"
80+
)
81+
}
7182

7283

7384
def combine_lsoa(eng, scot, wales):
@@ -246,7 +257,7 @@ def clean_gpp(
246257

247258

248259
def clean_pharmacies(
249-
england: Path, scotland: Path, postcodes: cudf.DataFrame
260+
england: Path, scotland: Path, wales: Path, postcodes: cudf.DataFrame
250261
) -> cudf.DataFrame:
251262
logger.info("Cleaning pharmacies...")
252263

@@ -270,7 +281,18 @@ def clean_pharmacies(
270281
.join(postcodes)
271282
.pipe(find_partial_pc, postcodes)
272283
)
273-
return epharm.append(spharm).reset_index()
284+
285+
wpharm = (
286+
cudf.from_pandas(pd.read_excel(wales, usecols=["Account Number", "Post Code"]))
287+
.rename(columns={"Account Number": "pharmacy", "Post Code": "postcode"})
288+
.astype(str)
289+
.pipe(fix_postcodes)
290+
.set_index("postcode")
291+
.join(postcodes)
292+
.pipe(find_partial_pc, postcodes)
293+
)
294+
breakpoint()
295+
return epharm.append(spharm).append(wpharm).reset_index()
274296

275297

276298
def clean_hospitals(

ahah/create_index.py

+61-73
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,18 @@
1-
import matplotlib.colors as colors
2-
import matplotlib.pyplot as plt
31
import numpy as np
42
import pandas as pd
53
from scipy.stats import norm
64

75
from ahah.common.utils import Config, combine_lsoa
86

97

10-
def exp_default(x, df):
11-
return norm.ppf((x - 0.5) / len(df))
12-
13-
148
def exp_trans(x, df):
159
return -23 * np.log(1 - (x / len(df)) * (1 - np.exp(-100 / 23)))
1610

1711

12+
def exp_default(x, df):
13+
return norm.ppf((x - 0.5) / len(df))
14+
15+
1816
def read_v3():
1917
v3 = pd.read_csv("./data/out/weighted_mean_dists.csv")
2018
v3_secure = pd.read_csv(
@@ -31,20 +29,37 @@ def read_v2():
3129

3230

3331
def process(idx, low_dist, env_dist, air_qual, high_dist):
34-
idx[low_dist] = idx[low_dist].rank(method="min").astype(int)
35-
idx[env_dist] = idx[env_dist].rank(method="min").astype(int)
36-
idx[air_qual] = idx[air_qual].rank(method="min").astype(int)
37-
idx[high_dist] = idx[high_dist].rank(method="min", ascending=False).astype(int)
38-
39-
idx[low_dist + env_dist + air_qual + high_dist] = exp_default(
40-
idx[low_dist + env_dist + air_qual + high_dist],
41-
idx,
32+
low_dist_ranked = [f"{asset}_ranked" for asset in low_dist]
33+
env_dist_ranked = [f"{asset}_ranked" for asset in env_dist]
34+
air_qual_ranked = [f"{asset}_ranked" for asset in air_qual]
35+
high_dist_ranked = [f"{asset}_ranked" for asset in high_dist]
36+
37+
low_dist_expd = [f"{asset}_expd" for asset in low_dist]
38+
env_dist_expd = [f"{asset}_expd" for asset in env_dist]
39+
air_qual_expd = [f"{asset}_expd" for asset in air_qual]
40+
high_dist_expd = [f"{asset}_expd" for asset in high_dist]
41+
42+
idx[low_dist_ranked] = idx[low_dist].rank(method="min").astype(int)
43+
idx[env_dist_ranked] = idx[env_dist].rank(method="min").astype(int)
44+
idx[air_qual_ranked] = idx[air_qual].rank(method="min").astype(int)
45+
idx[high_dist_ranked] = (
46+
idx[high_dist].rank(method="min", ascending=False).astype(int)
47+
)
48+
49+
# higher values os gspassive are better (prop of pc that is gs)
50+
idx[env_dist_ranked[1]] = (
51+
idx[env_dist[1]].rank(method="min", ascending=False).astype(int)
4252
)
4353

44-
idx["r_domain"] = idx[high_dist].mean(axis=1)
45-
idx["h_domain"] = idx[low_dist].mean(axis=1)
46-
idx["g_domain"] = idx[env_dist].mean(axis=1)
47-
idx["e_domain"] = idx[air_qual].mean(axis=1)
54+
idx[low_dist_expd] = exp_default(idx[low_dist_ranked], idx)
55+
idx[env_dist_expd] = exp_default(idx[env_dist_ranked], idx)
56+
idx[air_qual_expd] = exp_default(idx[air_qual_ranked], idx)
57+
idx[high_dist_expd] = exp_default(idx[high_dist_ranked], idx)
58+
59+
idx["h_domain"] = idx[low_dist_expd].mean(axis=1)
60+
idx["g_domain"] = idx[env_dist_expd].mean(axis=1)
61+
idx["e_domain"] = idx[air_qual_expd].mean(axis=1)
62+
idx["r_domain"] = idx[high_dist_expd].mean(axis=1)
4863

4964
idx["r_rank"] = idx["r_domain"].rank(method="min").astype(int)
5065
idx["h_rank"] = idx["h_domain"].rank(method="min").astype(int)
@@ -57,61 +72,34 @@ def process(idx, low_dist, env_dist, air_qual, high_dist):
5772
idx["e_exp"] = exp_trans(idx["e_rank"], idx)
5873

5974
idx["ahah"] = idx[["r_exp", "h_exp", "g_exp", "e_exp"]].mean(axis=1)
75+
idx["r_ahah"] = idx["ahah"].rank(method="min").astype(int)
76+
idx["d_ahah"] = pd.qcut(idx["r_ahah"], 10, labels=False)
6077
return idx
6178

6279

63-
low_dist = ["gpp", "dentists", "pharmacies", "hospitals", "leisure"]
64-
env_dist = ["greenspace", "gspassive", "bluespace"]
65-
air_qual = ["no22019", "so22019", "pm102019g"]
66-
high_dist = ["gambling", "offlicences", "pubs", "tobacconists", "fastfood"]
67-
v3 = read_v3().dropna()
68-
v3 = process(v3, low_dist, env_dist, air_qual, high_dist)
69-
70-
low_dist = ["gpp_dist", "ed_dist", "dent_dist", "pharm_dist", "leis_dist"]
71-
env_dist = ["green_act", "green_pas", "blue_dist"]
72-
air_qual = ["no2_mean", "pm10_mean", "so2_mean"]
73-
high_dist = ["gamb_dist", "ffood_dist", "pubs_dist", "off_dist", "tobac_dist"]
74-
v2 = read_v2()
75-
v2 = process(v2, low_dist, env_dist, air_qual, high_dist)
76-
77-
lsoa = combine_lsoa(
78-
eng=Config.RAW_DATA / "lsoa" / "england_lsoa_2011.shp",
79-
scot=Config.RAW_DATA / "lsoa" / "SG_DataZone_Bdry_2011.shp",
80-
wales=Config.RAW_DATA / "lsoa" / "lsoa_wales_2011.gpkg",
81-
)
82-
83-
v3 = lsoa.merge(v3, on="lsoa11", how="outer")
84-
v2 = lsoa.merge(v2, on="lsoa11", how="outer")
85-
86-
v3.to_file(Config.OUT_DATA / "v3_lsoa.gpkg", driver="GPKG")
87-
v2.to_file(Config.OUT_DATA / "v2_lsoa.gpkg", driver="GPKG")
88-
89-
ax = plt.figure().subplots(1, 2)
90-
col = "ahah"
91-
v3.plot(
92-
column=col,
93-
legend=True,
94-
cmap="RdYlBu_r",
95-
norm=colors.TwoSlopeNorm(vcenter=50, vmin=0, vmax=100),
96-
ax=ax[0],
97-
)
98-
col = "ahah"
99-
v2.plot(
100-
column=col,
101-
legend=True,
102-
cmap="RdYlBu_r",
103-
norm=colors.TwoSlopeNorm(vcenter=50, vmin=0, vmax=100),
104-
ax=ax[1],
105-
)
106-
plt.show()
107-
108-
test = v3.set_index("lsoa11").join(v2.set_index("lsoa11"), rsuffix="_v2")
109-
test["diff"] = test["ahah"] - test["ahah_v2"]
110-
111-
test.plot(
112-
column="diff",
113-
legend=True,
114-
cmap="RdYlBu_r",
115-
# norm=colors.TwoSlopeNorm(vcenter=50, vmin=0, vmax=100),
116-
)
117-
plt.show()
80+
if __name__ == "__main__":
81+
low_dist = ["gpp", "dentists", "pharmacies", "hospitals", "leisure"]
82+
env_dist = ["greenspace", "gspassive", "bluespace"]
83+
air_qual = ["no22019", "so22019", "pm102019g"]
84+
high_dist = ["gambling", "offlicences", "pubs", "tobacconists", "fastfood"]
85+
v3 = read_v3().dropna()
86+
v3 = process(v3, low_dist, env_dist, air_qual, high_dist)
87+
88+
low_dist = ["gpp_dist", "ed_dist", "dent_dist", "pharm_dist", "leis_dist"]
89+
env_dist = ["green_act", "green_pas", "blue_dist"]
90+
air_qual = ["no2_mean", "pm10_mean", "so2_mean"]
91+
high_dist = ["gamb_dist", "ffood_dist", "pubs_dist", "off_dist", "tobac_dist"]
92+
v2 = read_v2()
93+
v2 = process(v2, low_dist, env_dist, air_qual, high_dist)
94+
95+
lsoa = combine_lsoa(
96+
eng=Config.RAW_DATA / "lsoa" / "england_lsoa_2011.shp",
97+
scot=Config.RAW_DATA / "lsoa" / "SG_DataZone_Bdry_2011.shp",
98+
wales=Config.RAW_DATA / "lsoa" / "lsoa_wales_2011.gpkg",
99+
)
100+
101+
v3 = lsoa.merge(v3, on="lsoa11", how="outer")
102+
v2 = lsoa.merge(v2, on="lsoa11", how="outer")
103+
104+
v3.to_file(Config.OUT_DATA / "v3_lsoa.gpkg", driver="GPKG")
105+
v2.to_file(Config.OUT_DATA / "v2_lsoa.gpkg", driver="GPKG")

ahah/get_nhs.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
from pathlib import Path
2+
from zipfile import ZipFile
3+
14
import requests
5+
26
from ahah.common.logger import logger
37
from ahah.common.utils import Config
4-
from pathlib import Path
5-
from zipfile import ZipFile
68

79

810
def download_url(url: str, save_path: Path, chunk_size: int = 128):
@@ -46,3 +48,10 @@ def download_url(url: str, save_path: Path, chunk_size: int = 128):
4648
logger.debug(f"{Config.NHS_SCOT_URL + url} saved to {file}")
4749
else:
4850
logger.warning(f"{file} exists: skipping {Config.NHS_SCOT_URL + url}")
51+
52+
for name, url in Config.NHS_WALES_FILES.items():
53+
file = Config.RAW_DATA / "nhs" / "wales" / f"{name}.xls"
54+
if not file.exists():
55+
download_url(Config.NHS_WALES_URL + url, save_path=file)
56+
else:
57+
logger.warning(f"{file} exists: skipping {Config.NHS_WALES_URL + url}")

ahah/process_routing.py

+1
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ def get_buffers(
142142
pharmacies: cudf.DataFrame = clean_pharmacies(
143143
england=Config.RAW_DATA / "nhs" / "edispensary.csv",
144144
scotland=Config.RAW_DATA / "nhs" / "scotland" / "pharmacies.csv",
145+
wales=Config.RAW_DATA / "nhs" / "wales" / "pharmacy.xls",
145146
postcodes=pcs,
146147
)
147148
greenspace: cudf.DataFrame = clean_greenspace_access(

env.yml

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ dependencies:
1313
- pytables
1414
- pdoc3
1515
- openpyxl
16+
- xlrd
1617
- isort
1718
- flake8
1819
- black

0 commit comments

Comments
 (0)