-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathkde.py
138 lines (119 loc) · 4.7 KB
/
kde.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from typing import Optional
import geopandas as gpd
import numpy as np
import pandas as pd
import rasterio
from rasterio.crs import CRS
from scipy.spatial import cKDTree
from shapely.geometry import Point
from spatial_kde.kernels import quartic
from spatial_kde.utils import Bounds
def spatial_kernel_density(
points: gpd.GeoDataFrame,
radius: float,
output_path: str,
output_pixel_size: float,
output_driver: str = "GTiff",
weight_col: Optional[str] = None,
scaled: bool = False,
) -> None:
"""Calculate Kernel Density / heatmap from ``points``
.. note:: Distance calculations are planar so care should be taken with data
that is in geographic coordinate systems
Parameters
----------
points : gpd.GeoDataFrame
Input GeoDataFrame of points to generate a KDE from
radius : float
Radius of KDE, same units as the coordinate reference system of ``points``
Sometimes referred to as search radius or bandwidth
output_path : str
Path to write output raster to
output_pixel_size : float
Output cell/pixel size of the created array. Same units as the coordinate
reference system of ``points``
output_driver : str
Output format (driver) used to create image. See also
https://rasterio.readthedocs.io/en/latest/api/rasterio.drivers.html
weight_col : Optional[str], optional
A column in ``points`` to weight the kernel density by, any points that
are NaN in this field will not contribute to the KDE.
If None, the all points will have uniform weight of 1.
scaled : bool
If True will output mathematically scaled values, else will output raw
values.
"""
if weight_col and weight_col not in points.columns:
raise ValueError(f"`{weight_col}` column not found in `points` GeoDataFrame")
if weight_col:
points = points.dropna(subset=[weight_col])
# Get the bounding box extent for the new raster
bounds = Bounds.from_gdf(gdf=points, radius=radius)
# Create x/y coordinate pairs for neighbour calculations on the kd-tree
# this is the "top left" coordinate, not the centre of the pixel
x_mesh, y_mesh = np.meshgrid(
bounds.x_coords(output_pixel_size),
bounds.y_coords(output_pixel_size),
)
# create the coordinate grid of the pixel centre points
xc = x_mesh + (output_pixel_size / 2)
yc = y_mesh + (output_pixel_size / 2)
# stack the coordinates so we can do vectorised nearest neighbour ops
xy = np.column_stack((xc.flatten(), yc.flatten()))
# Create a KDTree for all the points so we can more efficiently do a lookup
# for nearby points when calculating the KDE
kdt = cKDTree(
np.column_stack(
(
points.geometry.apply(lambda g: g.centroid.x).to_numpy(),
points.geometry.apply(lambda g: g.centroid.y).to_numpy(),
)
)
)
# Find all the points on the grid that have neighbours within the search
# radius, these are the non-zero points of the KDE surface
kde_pnts = pd.DataFrame(kdt.query_ball_point(xy, r=radius), columns=["nn"])
# Filter out points that have no neighbours within the search radius
kde_pnts["num"] = kde_pnts.nn.apply(len)
kde_pnts = kde_pnts.query("num > 0").drop(columns=["num"])
# Slow implementation iterating over every pixel / point
ndv = -9999
z_scalar = (ndv + np.zeros_like(xc)).flatten()
# TODO vectorise this
# calculate the KDE value for every point that has neighbours
for row in kde_pnts.itertuples():
centre = Point(xy[row.Index])
distances = [centre.distance(points.at[i, "geometry"]) for i in row.nn]
weights = None
if weight_col:
weights = [points.at[i, weight_col] for i in row.nn]
z_scalar[row.Index] = quartic(
distances=distances,
radius=radius,
weights=weights,
scaled=scaled,
)
# create the output raster
z = z_scalar.reshape(xc.shape)
with rasterio.open(
fp=output_path,
mode="w",
driver=output_driver,
height=z.shape[0],
width=z.shape[1],
count=1,
dtype=z.dtype,
crs=CRS.from_user_input(points.crs),
transform=rasterio.transform.from_bounds(
west=bounds.min_x,
south=bounds.min_y,
east=bounds.max_x,
north=bounds.max_y,
width=z.shape[1],
height=z.shape[0],
),
nodata=ndv,
) as dst:
# numpy arrays start at the "bottom left", whereas rasters are written
# from the "top left", hence flipping the array up-down before writing
dst.write(np.flipud(z), 1)