1
1
import cudf
2
2
import geopandas as gpd
3
-
4
3
import pandas as pd
5
4
from cuml .neighbors .nearest_neighbors import NearestNeighbors
6
5
from pandas import IndexSlice as idx
7
- from rich .progress import track
8
6
9
7
from ahah .common .logger import logger
10
8
from ahah .common .utils import Config
@@ -31,32 +29,17 @@ def process_edges(edges: pd.DataFrame) -> pd.DataFrame:
31
29
b_roads = ["B Road" , "B Road Primary" ]
32
30
33
31
edges ["speed_estimate" ] = - 1
34
- edges = edges .set_index (["formOfWay" , "routeHierarchy " ])
32
+ edges = edges .set_index (["formOfWay" , "roadClassification " ])
35
33
36
34
edges .loc [idx [:, "Motorway" ], "speed_estimate" ] = 67
37
35
edges .loc [idx ["Dual Carriageway" , a_roads ], "speed_estimate" ] = 57
38
36
edges .loc [idx ["Dual Carriageway" , b_roads ], "speed_estimate" ] = 45
39
37
edges .loc [idx ["Single Carriageway" , a_roads + b_roads ], "speed_estimate" ] = 25
40
- edges .loc [idx [:, "Minor Road" ], "speed_estimate" ] = 24
41
- edges .loc [idx [:, "Local Road" ], "speed_estimate" ] = 20
38
+ edges .loc [idx [:, "Unclassified" ], "speed_estimate" ] = 24
42
39
edges .loc [idx ["Roundabout" , :], "speed_estimate" ] = 10
43
40
edges .loc [idx [["Track" , "Layby" ], :], "speed_estimate" ] = 5
44
41
edges .loc [edges ["speed_estimate" ] == - 1 , "speed_estimate" ] = 10
45
42
46
- # Unsure what to keep
47
- # edges = edges.drop(
48
- # index="Traffic Island Link At Junction", level=0, errors="ignore"
49
- # )
50
- # edges = edges.drop(index="Traffic Island Link", level=0, errors="ignore")
51
- # edges = edges.drop(index="Enclosed Traffic Area", level=0, errors="ignore")
52
- # edges = edges.drop(index="Layby", level=0, errors="ignore")
53
- # edges = edges.drop(index="Track", level=0, errors="ignore")
54
- # edges = edges.drop(index="Guided Busway", level=0, errors="ignore")
55
- # edges = edges.drop(index="Restricted Local Access Road", level=1, errors="ignore")
56
- # edges = edges.drop(
57
- # index="Restricted Secondary Access Road", level=1, errors="ignore"
58
- # )
59
-
60
43
edges = edges .assign (
61
44
speed_estimate = edges ["speed_estimate" ] * 1.609344 ,
62
45
time_weighted = (edges ["length" ].astype (float ) / 1000 )
@@ -67,95 +50,113 @@ def process_edges(edges: pd.DataFrame) -> pd.DataFrame:
67
50
return edges [["startNode" , "endNode" , "time_weighted" , "length" ]]
68
51
69
52
70
- def change_ferry_nodes (nodes , fnodes , fedges ):
53
+ def process_ferry (ferry_df ):
54
+ ferry_df ["node_id" ] = ferry_df .geometry .boundary .apply (lambda row : row .geoms )
55
+
56
+ ferry_nodes = (
57
+ ferry_df ["node_id" ]
58
+ .explode ()
59
+ .drop_duplicates ()
60
+ .reset_index (drop = True )
61
+ .to_frame ()
62
+ )
63
+ ferry_nodes ["easting" ] = ferry_nodes ["node_id" ].apply (lambda row : row .x )
64
+ ferry_nodes ["northing" ] = ferry_nodes ["node_id" ].apply (lambda row : row .y )
65
+
66
+ ferry_edges = ferry_df [["node_id" , "geometry" ]].copy ()
67
+ ferry_edges ["startNode" ] = ferry_edges ["node_id" ].apply (lambda row : row [0 ])
68
+ ferry_edges ["endNode" ] = ferry_edges ["node_id" ].apply (lambda row : row [1 ])
69
+ ferry_edges ["length" ] = ferry_edges .geometry .length
70
+ ferry_edges = ferry_edges .assign (
71
+ time_weighted = (ferry_edges ["length" ].astype (float ) / 1000 ) / 25 * 1.609344 * 60
72
+ )
73
+
74
+ ferry_nodes ["node_id" ] = ferry_nodes ["node_id" ].astype (str )
75
+ ferry_nodes = cudf .from_pandas (ferry_nodes [["node_id" , "easting" , "northing" ]])
76
+ ferry_edges ["startNode" ] = ferry_edges ["startNode" ].astype (str )
77
+ ferry_edges ["endNode" ] = ferry_edges ["endNode" ].astype (str )
78
+ ferry_edges = cudf .from_pandas (
79
+ ferry_edges .rename (columns = {"FERRY_FROM" : "startNode" , "FERRY_TO" : "endNode" })[
80
+ ["startNode" , "endNode" , "length" , "time_weighted" ]
81
+ ]
82
+ )
83
+ return ferry_nodes , ferry_edges
84
+
85
+
86
+ def change_ferry_nodes (nodes_df , fnodes , fedges ):
71
87
nbrs = NearestNeighbors (n_neighbors = 1 , output_type = "cudf" , algorithm = "brute" ).fit (
72
- nodes [["easting" , "northing" ]]
88
+ nodes_df [["easting" , "northing" ]]
73
89
)
74
90
_ , indices = nbrs .kneighbors (fnodes [["easting" , "northing" ]])
75
- fnodes ["road_id" ] = nodes .iloc [indices ]["TOID " ].reset_index (drop = True )
91
+ fnodes ["road_id" ] = nodes_df .iloc [indices ]["node_id " ].reset_index (drop = True )
76
92
77
93
fedges = (
78
94
fedges .merge (
79
- fnodes [["TOID " , "road_id" ]],
95
+ fnodes [["node_id " , "road_id" ]],
80
96
left_on = "startNode" ,
81
- right_on = "TOID " ,
97
+ right_on = "node_id " ,
82
98
)
83
99
.rename (columns = {"road_id" : "startNode" })
84
- .drop ("TOID " , axis = 1 )
100
+ .drop ("node_id " , axis = 1 )
85
101
)
102
+
86
103
fedges = (
87
104
fedges .merge (
88
- fnodes [["TOID " , "road_id" ]],
105
+ fnodes [["node_id " , "road_id" ]],
89
106
left_on = "endNode" ,
90
- right_on = "TOID " ,
107
+ right_on = "node_id " ,
91
108
)
92
109
.rename (columns = {"road_id" : "endNode" })
93
- .drop ("TOID " , axis = 1 )
110
+ .drop ("node_id " , axis = 1 )
94
111
)
95
112
96
113
fnodes = fnodes [["road_id" , "easting" , "northing" ]].rename (
97
- columns = {"road_ID " : "TOID " }
114
+ columns = {"road_id " : "node_id " }
98
115
)
99
116
return fnodes , fedges
100
117
101
118
102
119
if __name__ == "__main__" :
103
120
logger .info ("Starting OS highways processing..." )
104
121
105
- NUM_EDGES = 5_062_741
106
- NUM_NODES = 4_289_045
107
-
108
- # edges processing
109
- edges = cudf .DataFrame ()
110
- for n in track (range (0 , NUM_EDGES , 100_000 ), description = "Processing edges..." ):
111
- subset_edges = gpd .read_file (
122
+ edges = cudf .from_pandas (
123
+ gpd .read_file (
112
124
Config .HW_DATA ,
113
125
layer = "RoadLink" ,
114
- rows = slice (n , n + 100_000 ),
115
126
ignore_geometry = True ,
116
127
).pipe (process_edges )
117
- edges : cudf .DataFrame = edges .append (cudf .from_pandas (subset_edges ))
118
- ferry_edges = cudf .from_pandas (
119
- gpd .read_file (Config .HW_DATA , layer = "FerryLink" , ignore_geometry = True )
120
- )[["startNode" , "endNode" , "SHAPE_Length" ]].rename (
121
- columns = {"SHAPE_Length" : "length" }
122
- )
123
- ferry_edges = ferry_edges .assign (
124
- time_weighted = (ferry_edges ["length" ].astype (float ) / 1000 ) / 25 * 1.609344 * 60
125
128
)
126
- logger .debug ("Edges processed." )
127
-
128
- # nodes processing
129
- nodes = cudf .DataFrame ()
130
- for n in track (range (0 , NUM_NODES , 100_000 ), description = "Processing nodes..." ):
131
- subset_nodes = gpd .read_file (
132
- Config .HW_DATA ,
133
- layer = "RoadNode" ,
134
- rows = slice (n , n + 100_000 ),
135
- )
136
- subset_nodes ["easting" ], subset_nodes ["northing" ] = (
137
- subset_nodes .geometry .x .astype ("int" ),
138
- subset_nodes .geometry .y .astype ("int" ),
139
- )
140
- subset_nodes .drop ("geometry" , axis = 1 , inplace = True )
141
- nodes : cudf .DataFrame = nodes .append (cudf .from_pandas (subset_nodes ))
142
- ferry_nodes = gpd .read_file (Config .HW_DATA , layer = "FerryNode" )[["TOID" , "geometry" ]]
143
- ferry_nodes ["easting" ], ferry_nodes ["northing" ] = (
144
- ferry_nodes .geometry .x .astype ("int" ),
145
- ferry_nodes .geometry .y .astype ("int" ),
129
+ nodes = gpd .read_file (Config .HW_DATA , layer = "RoadNode" )
130
+ nodes ["easting" ], nodes ["northing" ] = nodes .geometry .x , nodes .geometry .y
131
+ nodes = cudf .from_pandas (
132
+ nodes [["id" , "easting" , "northing" ]].rename (columns = {"id" : "node_id" })
146
133
)
147
- ferry_nodes = cudf .from_pandas (ferry_nodes .drop ("geometry" , axis = 1 ))
148
- logger .debug ("Nodes processed." )
134
+ ferry = gpd .read_file (
135
+ "./data/raw/os_highways/strtgi_essh_gb/ferry_line.shp" ,
136
+ )[["FERRY_FROM" , "FERRY_TO" , "geometry" ]]
137
+ ferry_nodes , ferry_edges = process_ferry (ferry )
138
+
139
+ # for some reason the isles of scilly do not have a ferry route
140
+ extra_ferry_nodes = {
141
+ "node_id" : ["scilly" , "penz" ],
142
+ "easting" : [90139 , 147432 ],
143
+ "northing" : [10633 , 30086 ],
144
+ }
145
+ extra_ferry_edges = {
146
+ "startNode" : ["penz" , "scilly" ],
147
+ "endNode" : ["scilly" , "penz" ],
148
+ "length" : [165 , 165 ],
149
+ "time_weighted" : [165 , 165 ],
150
+ }
151
+ ferry_nodes = ferry_nodes .append (extra_ferry_nodes , ignore_index = True )
152
+ ferry_edges = ferry_edges .append (extra_ferry_edges , ignore_index = True )
149
153
150
154
ferry_nodes , ferry_edges = change_ferry_nodes (nodes , ferry_nodes , ferry_edges )
151
155
152
- nodes = nodes [["TOID " , "easting" , "northing" ]].append (ferry_nodes )
156
+ nodes = nodes [["node_id " , "easting" , "northing" ]].append (ferry_nodes )
153
157
edges = edges .reset_index (drop = True ).append (ferry_edges )
154
- nodes = nodes .rename (columns = {"TOID" : "node_id" })
155
- nodes = nodes [
156
- (nodes ["node_id" ].isin (edges ["startNode" ]))
157
- | (nodes ["node_id" ].isin (edges ["endNode" ]))
158
- ]
158
+
159
+ # convert to sequential ints
159
160
nodes ["node_id" ] = nodes ["node_id" ].astype ("category" )
160
161
node_ids = dict (enumerate (nodes ["node_id" ].cat .categories .to_pandas ()))
161
162
node_ids = {v : k for k , v in node_ids .items ()}
0 commit comments