Skip to content

Commit 7e125bc

Browse files
committed
use os open roads
1 parent e434bb7 commit 7e125bc

File tree

3 files changed

+154
-131
lines changed

3 files changed

+154
-131
lines changed

ahah/os_highways.py

+74-73
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
import cudf
22
import geopandas as gpd
3-
43
import pandas as pd
54
from cuml.neighbors.nearest_neighbors import NearestNeighbors
65
from pandas import IndexSlice as idx
7-
from rich.progress import track
86

97
from ahah.common.logger import logger
108
from ahah.common.utils import Config
@@ -31,32 +29,17 @@ def process_edges(edges: pd.DataFrame) -> pd.DataFrame:
3129
b_roads = ["B Road", "B Road Primary"]
3230

3331
edges["speed_estimate"] = -1
34-
edges = edges.set_index(["formOfWay", "routeHierarchy"])
32+
edges = edges.set_index(["formOfWay", "roadClassification"])
3533

3634
edges.loc[idx[:, "Motorway"], "speed_estimate"] = 67
3735
edges.loc[idx["Dual Carriageway", a_roads], "speed_estimate"] = 57
3836
edges.loc[idx["Dual Carriageway", b_roads], "speed_estimate"] = 45
3937
edges.loc[idx["Single Carriageway", a_roads + b_roads], "speed_estimate"] = 25
40-
edges.loc[idx[:, "Minor Road"], "speed_estimate"] = 24
41-
edges.loc[idx[:, "Local Road"], "speed_estimate"] = 20
38+
edges.loc[idx[:, "Unclassified"], "speed_estimate"] = 24
4239
edges.loc[idx["Roundabout", :], "speed_estimate"] = 10
4340
edges.loc[idx[["Track", "Layby"], :], "speed_estimate"] = 5
4441
edges.loc[edges["speed_estimate"] == -1, "speed_estimate"] = 10
4542

46-
# Unsure what to keep
47-
# edges = edges.drop(
48-
# index="Traffic Island Link At Junction", level=0, errors="ignore"
49-
# )
50-
# edges = edges.drop(index="Traffic Island Link", level=0, errors="ignore")
51-
# edges = edges.drop(index="Enclosed Traffic Area", level=0, errors="ignore")
52-
# edges = edges.drop(index="Layby", level=0, errors="ignore")
53-
# edges = edges.drop(index="Track", level=0, errors="ignore")
54-
# edges = edges.drop(index="Guided Busway", level=0, errors="ignore")
55-
# edges = edges.drop(index="Restricted Local Access Road", level=1, errors="ignore")
56-
# edges = edges.drop(
57-
# index="Restricted Secondary Access Road", level=1, errors="ignore"
58-
# )
59-
6043
edges = edges.assign(
6144
speed_estimate=edges["speed_estimate"] * 1.609344,
6245
time_weighted=(edges["length"].astype(float) / 1000)
@@ -67,95 +50,113 @@ def process_edges(edges: pd.DataFrame) -> pd.DataFrame:
6750
return edges[["startNode", "endNode", "time_weighted", "length"]]
6851

6952

70-
def change_ferry_nodes(nodes, fnodes, fedges):
53+
def process_ferry(ferry_df):
54+
ferry_df["node_id"] = ferry_df.geometry.boundary.apply(lambda row: row.geoms)
55+
56+
ferry_nodes = (
57+
ferry_df["node_id"]
58+
.explode()
59+
.drop_duplicates()
60+
.reset_index(drop=True)
61+
.to_frame()
62+
)
63+
ferry_nodes["easting"] = ferry_nodes["node_id"].apply(lambda row: row.x)
64+
ferry_nodes["northing"] = ferry_nodes["node_id"].apply(lambda row: row.y)
65+
66+
ferry_edges = ferry_df[["node_id", "geometry"]].copy()
67+
ferry_edges["startNode"] = ferry_edges["node_id"].apply(lambda row: row[0])
68+
ferry_edges["endNode"] = ferry_edges["node_id"].apply(lambda row: row[1])
69+
ferry_edges["length"] = ferry_edges.geometry.length
70+
ferry_edges = ferry_edges.assign(
71+
time_weighted=(ferry_edges["length"].astype(float) / 1000) / 25 * 1.609344 * 60
72+
)
73+
74+
ferry_nodes["node_id"] = ferry_nodes["node_id"].astype(str)
75+
ferry_nodes = cudf.from_pandas(ferry_nodes[["node_id", "easting", "northing"]])
76+
ferry_edges["startNode"] = ferry_edges["startNode"].astype(str)
77+
ferry_edges["endNode"] = ferry_edges["endNode"].astype(str)
78+
ferry_edges = cudf.from_pandas(
79+
ferry_edges.rename(columns={"FERRY_FROM": "startNode", "FERRY_TO": "endNode"})[
80+
["startNode", "endNode", "length", "time_weighted"]
81+
]
82+
)
83+
return ferry_nodes, ferry_edges
84+
85+
86+
def change_ferry_nodes(nodes_df, fnodes, fedges):
7187
nbrs = NearestNeighbors(n_neighbors=1, output_type="cudf", algorithm="brute").fit(
72-
nodes[["easting", "northing"]]
88+
nodes_df[["easting", "northing"]]
7389
)
7490
_, indices = nbrs.kneighbors(fnodes[["easting", "northing"]])
75-
fnodes["road_id"] = nodes.iloc[indices]["TOID"].reset_index(drop=True)
91+
fnodes["road_id"] = nodes_df.iloc[indices]["node_id"].reset_index(drop=True)
7692

7793
fedges = (
7894
fedges.merge(
79-
fnodes[["TOID", "road_id"]],
95+
fnodes[["node_id", "road_id"]],
8096
left_on="startNode",
81-
right_on="TOID",
97+
right_on="node_id",
8298
)
8399
.rename(columns={"road_id": "startNode"})
84-
.drop("TOID", axis=1)
100+
.drop("node_id", axis=1)
85101
)
102+
86103
fedges = (
87104
fedges.merge(
88-
fnodes[["TOID", "road_id"]],
105+
fnodes[["node_id", "road_id"]],
89106
left_on="endNode",
90-
right_on="TOID",
107+
right_on="node_id",
91108
)
92109
.rename(columns={"road_id": "endNode"})
93-
.drop("TOID", axis=1)
110+
.drop("node_id", axis=1)
94111
)
95112

96113
fnodes = fnodes[["road_id", "easting", "northing"]].rename(
97-
columns={"road_ID": "TOID"}
114+
columns={"road_id": "node_id"}
98115
)
99116
return fnodes, fedges
100117

101118

102119
if __name__ == "__main__":
103120
logger.info("Starting OS highways processing...")
104121

105-
NUM_EDGES = 5_062_741
106-
NUM_NODES = 4_289_045
107-
108-
# edges processing
109-
edges = cudf.DataFrame()
110-
for n in track(range(0, NUM_EDGES, 100_000), description="Processing edges..."):
111-
subset_edges = gpd.read_file(
122+
edges = cudf.from_pandas(
123+
gpd.read_file(
112124
Config.HW_DATA,
113125
layer="RoadLink",
114-
rows=slice(n, n + 100_000),
115126
ignore_geometry=True,
116127
).pipe(process_edges)
117-
edges: cudf.DataFrame = edges.append(cudf.from_pandas(subset_edges))
118-
ferry_edges = cudf.from_pandas(
119-
gpd.read_file(Config.HW_DATA, layer="FerryLink", ignore_geometry=True)
120-
)[["startNode", "endNode", "SHAPE_Length"]].rename(
121-
columns={"SHAPE_Length": "length"}
122-
)
123-
ferry_edges = ferry_edges.assign(
124-
time_weighted=(ferry_edges["length"].astype(float) / 1000) / 25 * 1.609344 * 60
125128
)
126-
logger.debug("Edges processed.")
127-
128-
# nodes processing
129-
nodes = cudf.DataFrame()
130-
for n in track(range(0, NUM_NODES, 100_000), description="Processing nodes..."):
131-
subset_nodes = gpd.read_file(
132-
Config.HW_DATA,
133-
layer="RoadNode",
134-
rows=slice(n, n + 100_000),
135-
)
136-
subset_nodes["easting"], subset_nodes["northing"] = (
137-
subset_nodes.geometry.x.astype("int"),
138-
subset_nodes.geometry.y.astype("int"),
139-
)
140-
subset_nodes.drop("geometry", axis=1, inplace=True)
141-
nodes: cudf.DataFrame = nodes.append(cudf.from_pandas(subset_nodes))
142-
ferry_nodes = gpd.read_file(Config.HW_DATA, layer="FerryNode")[["TOID", "geometry"]]
143-
ferry_nodes["easting"], ferry_nodes["northing"] = (
144-
ferry_nodes.geometry.x.astype("int"),
145-
ferry_nodes.geometry.y.astype("int"),
129+
nodes = gpd.read_file(Config.HW_DATA, layer="RoadNode")
130+
nodes["easting"], nodes["northing"] = nodes.geometry.x, nodes.geometry.y
131+
nodes = cudf.from_pandas(
132+
nodes[["id", "easting", "northing"]].rename(columns={"id": "node_id"})
146133
)
147-
ferry_nodes = cudf.from_pandas(ferry_nodes.drop("geometry", axis=1))
148-
logger.debug("Nodes processed.")
134+
ferry = gpd.read_file(
135+
"./data/raw/os_highways/strtgi_essh_gb/ferry_line.shp",
136+
)[["FERRY_FROM", "FERRY_TO", "geometry"]]
137+
ferry_nodes, ferry_edges = process_ferry(ferry)
138+
139+
# for some reason the isles of scilly do not have a ferry route
140+
extra_ferry_nodes = {
141+
"node_id": ["scilly", "penz"],
142+
"easting": [90139, 147432],
143+
"northing": [10633, 30086],
144+
}
145+
extra_ferry_edges = {
146+
"startNode": ["penz", "scilly"],
147+
"endNode": ["scilly", "penz"],
148+
"length": [165, 165],
149+
"time_weighted": [165, 165],
150+
}
151+
ferry_nodes = ferry_nodes.append(extra_ferry_nodes, ignore_index=True)
152+
ferry_edges = ferry_edges.append(extra_ferry_edges, ignore_index=True)
149153

150154
ferry_nodes, ferry_edges = change_ferry_nodes(nodes, ferry_nodes, ferry_edges)
151155

152-
nodes = nodes[["TOID", "easting", "northing"]].append(ferry_nodes)
156+
nodes = nodes[["node_id", "easting", "northing"]].append(ferry_nodes)
153157
edges = edges.reset_index(drop=True).append(ferry_edges)
154-
nodes = nodes.rename(columns={"TOID": "node_id"})
155-
nodes = nodes[
156-
(nodes["node_id"].isin(edges["startNode"]))
157-
| (nodes["node_id"].isin(edges["endNode"]))
158-
]
158+
159+
# convert to sequential ints
159160
nodes["node_id"] = nodes["node_id"].astype("category")
160161
node_ids = dict(enumerate(nodes["node_id"].cat.categories.to_pandas()))
161162
node_ids = {v: k for k, v in node_ids.items()}

dvc.lock

+61-55
Original file line numberDiff line numberDiff line change
@@ -22,91 +22,94 @@ stages:
2222
cmd: python -m ahah.get_nhs
2323
outs:
2424
- path: data/raw/nhs/edispensary.csv
25-
md5: 08b0917beed22ab5d22c4ae21d0d7b8e
26-
size: 3614340
25+
md5: 125afafb0b65f74b8d842ccc4f678800
26+
size: 3632947
2727
- path: data/raw/nhs/egdpprac.csv
28-
md5: 4dc17037a5f685d23bb84cd52235f486
29-
size: 1853765
28+
md5: ea8db3c8e63431cd07c12a45ef40c2bf
29+
size: 1860481
3030
- path: data/raw/nhs/epraccur.csv
31-
md5: 56822f6bf35907c4775be4f343072f1f
32-
size: 3245662
31+
md5: 257c92fb73edd00fcb03f77a719d74e7
32+
size: 3266982
3333
- path: data/raw/nhs/ets.csv
34-
md5: fe02d5c044ca21250894279ec493c0c4
35-
size: 5961248
34+
md5: 458ca420894fa5ad18486a7942724dbe
35+
size: 6021008
3636
- path: data/raw/nhs/scotland/dentists.csv
37-
md5: 4f02f592ca21a916a55b40e4f60acd2c
38-
size: 124458
37+
md5: 053f9422d7f41c4972a649fbe761c703
38+
size: 124059
3939
- path: data/raw/nhs/scotland/gpp.csv
4040
md5: 4e1bc2658cd6bdce116b21f13ac8a0ee
4141
size: 167311
4242
- path: data/raw/nhs/scotland/hospitals.csv
4343
md5: 8e995fe4d4517c8f72b9196253a38981
4444
size: 38345
4545
- path: data/raw/nhs/scotland/pharmacies.csv
46-
md5: bda581caf6613b94585caa26ec525930
47-
size: 156427
46+
md5: 67967e5bd751910f4eb16ac1ba80c3cb
47+
size: 156110
48+
- path: data/raw/nhs/wales/pharmacy.xls
49+
md5: e50d25cef0f2a750f55159f1ad1428f6
50+
size: 2699264
4851
process_routing:
4952
cmd: python -m ahah.process_routing
5053
deps:
54+
- path: ./data/raw/onspd/ONSPD_FEB_2022_UK.csv
55+
md5: d96643019bd1b74c823dc7e99827bc1a
56+
size: 1348959941
5157
- path: data/processed/osm/nodes.parquet
52-
md5: c34e65013ee63e0618c4d5c444242dc2
53-
size: 54677314
58+
md5: f87669ec9a720324625ee3ff9c0b900d
59+
size: 46851913
5460
- path: data/raw/bluespace
55-
md5: 47aa3cd33c5f77473514cdaf7cecec2d.dir
56-
size: 2056885894
57-
nfiles: 1076
61+
md5: 809b6517d55e952fb50a12a111ad7afc.dir
62+
size: 1168390324
63+
nfiles: 428
5864
- path: data/raw/greenspace/access.shp
5965
md5: 217231f622e8690d1ea1cc3aec341c6b
6066
size: 12760320
6167
- path: data/raw/nhs/edispensary.csv
62-
md5: 08b0917beed22ab5d22c4ae21d0d7b8e
63-
size: 3614340
68+
md5: 125afafb0b65f74b8d842ccc4f678800
69+
size: 3632947
6470
- path: data/raw/nhs/egdpprac.csv
65-
md5: 4dc17037a5f685d23bb84cd52235f486
66-
size: 1853765
71+
md5: ea8db3c8e63431cd07c12a45ef40c2bf
72+
size: 1860481
6773
- path: data/raw/nhs/epraccur.csv
68-
md5: 56822f6bf35907c4775be4f343072f1f
69-
size: 3245662
74+
md5: 257c92fb73edd00fcb03f77a719d74e7
75+
size: 3266982
7076
- path: data/raw/nhs/ets.csv
71-
md5: fe02d5c044ca21250894279ec493c0c4
72-
size: 5961248
77+
md5: 458ca420894fa5ad18486a7942724dbe
78+
size: 6021008
7379
- path: data/raw/nhs/scotland/dentists.csv
74-
md5: 4f02f592ca21a916a55b40e4f60acd2c
75-
size: 124458
80+
md5: 053f9422d7f41c4972a649fbe761c703
81+
size: 124059
7682
- path: data/raw/nhs/scotland/gpp.csv
7783
md5: 4e1bc2658cd6bdce116b21f13ac8a0ee
7884
size: 167311
7985
- path: data/raw/nhs/scotland/hospitals.csv
8086
md5: 8e995fe4d4517c8f72b9196253a38981
8187
size: 38345
8288
- path: data/raw/nhs/scotland/pharmacies.csv
83-
md5: bda581caf6613b94585caa26ec525930
84-
size: 156427
85-
- path: data/raw/onspd/postcodes.csv
86-
md5: 64cbe97288ce9f9558e1cae3c2f96dba
87-
size: 1340976289
89+
md5: 67967e5bd751910f4eb16ac1ba80c3cb
90+
size: 156110
8891
outs:
8992
- path: data/processed/bluespace.parquet
90-
md5: af6b67ea4997f08615bc5bd4a94a096f
91-
size: 1933958
93+
md5: 5e1a0d606f003ea0585a3bffff87763e
94+
size: 28397281
9295
- path: data/processed/dentists.parquet
93-
md5: 967641f8eb5408d8aff60ada26114825
94-
size: 338547
96+
md5: d21fa5f56290732781dc2a4a27c909ab
97+
size: 44072770
9598
- path: data/processed/gpp.parquet
96-
md5: 3294a608b4f35da68f8f045b76e43987
97-
size: 414799
99+
md5: d709f0cc8cf44e991ebe03abe91fd68d
100+
size: 37044008
98101
- path: data/processed/greenspace.parquet
99-
md5: 327052918391f44fe4da5a2088f0bd04
100-
size: 15818915
102+
md5: 690d25542de3b7f8d3ea7a9d54a87f77
103+
size: 37548024
101104
- path: data/processed/hospitals.parquet
102-
md5: 47f8cdfb0a89a5d77873a9882aeae289
103-
size: 887315
105+
md5: 1531359545c71cc839d28db1c9c408f2
106+
size: 29442902
104107
- path: data/processed/pharmacies.parquet
105-
md5: f660c2f40e614de52b16c66e90253faf
106-
size: 395092
108+
md5: 24b8442ff597a0fd8043bc48fadd0ea6
109+
size: 46420723
107110
- path: data/processed/postcodes.parquet
108-
md5: 2303a1e02665394c0ed3fef736633b05
109-
size: 32569100
111+
md5: ad43c8c1d00422ce0feb9371e44e178c
112+
size: 28855720
110113
process_air:
111114
cmd: python -m ahah.process_air
112115
deps:
@@ -124,8 +127,8 @@ stages:
124127
size: 497942736
125128
outs:
126129
- path: data/out/lsoa_air.csv
127-
md5: fcaa940bf0f6e9a94baab6483c9739b6
128-
size: 1890773
130+
md5: d8670f2e387ccd43bb5e2331de171e27
131+
size: 2120883
129132
greenspace_passive:
130133
cmd: python -m ahah.greenspace_passive
131134
deps:
@@ -142,13 +145,16 @@ stages:
142145
os_highways:
143146
cmd: python -m ahah.os_highways
144147
deps:
145-
- path: data/raw/os_highways/Highways_Data_March19.gdb.zip
146-
md5: 64526f270e1778406689166840da0f1e
147-
size: 2320550286
148+
- path: data/raw/os_highways/oproad_gb.gpkg
149+
md5: a8534588b6a1f6bccf83f046633b50bc
150+
size: 2382544896
151+
- path: data/raw/os_highways/strtgi_essh_gb/ferry_line.shp
152+
md5: 56dd01abf99338e2e0ddd755b87d58c6
153+
size: 57356
148154
outs:
149155
- path: data/processed/osm/edges.parquet
150-
md5: a488e42f22f9b61d8117510291113b5c
151-
size: 103594923
156+
md5: 687a684dda7db423c4cf320acd427f66
157+
size: 45879390
152158
- path: data/processed/osm/nodes.parquet
153-
md5: c34e65013ee63e0618c4d5c444242dc2
154-
size: 54677314
159+
md5: f87669ec9a720324625ee3ff9c0b900d
160+
size: 46851913

0 commit comments

Comments
 (0)