From 09c53d6fbb6b44c46b9e88cf9044f7ccd1ebd555 Mon Sep 17 00:00:00 2001 From: Lachlan Grose Date: Thu, 22 Feb 2024 15:43:29 +1100 Subject: [PATCH] fix: Add layer id to structural data using a spatial join (#38) * fix: adding layerid to structural data bug where structure data points were not being assigned to a unit. Uses a spatial join to assign points * style: style fixes by ruff and autoformatting by black * fix: adding layerID to structure_samples Making column name clearer * update sp join with new index * style: style fixes by ruff and autoformatting by black --------- Co-authored-by: AngRodrigues <126531163+ARodrPGN@users.noreply.github.com> Co-authored-by: AngRodrigues Co-authored-by: AngRodrigues --- map2loop/project.py | 10 +++++----- map2loop/sampler.py | 30 +++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/map2loop/project.py b/map2loop/project.py index 778760c2..1e259641 100644 --- a/map2loop/project.py +++ b/map2loop/project.py @@ -368,16 +368,16 @@ def sample_map_data(self): Use the samplers to extract points along polylines or unit boundaries """ self.geology_samples = self.samplers[Datatype.GEOLOGY].sample( - self.map_data.get_map_data(Datatype.GEOLOGY) + self.map_data.get_map_data(Datatype.GEOLOGY), self.map_data ) self.structure_samples = self.samplers[Datatype.STRUCTURE].sample( - self.map_data.get_map_data(Datatype.STRUCTURE) + self.map_data.get_map_data(Datatype.STRUCTURE), self.map_data ) self.fault_samples = self.samplers[Datatype.FAULT].sample( - self.map_data.get_map_data(Datatype.FAULT) + self.map_data.get_map_data(Datatype.FAULT), self.map_data ) self.fold_samples = self.samplers[Datatype.FOLD].sample( - self.map_data.get_map_data(Datatype.FOLD) + self.map_data.get_map_data(Datatype.FOLD), self.map_data ) def extract_geology_contacts(self): @@ -667,7 +667,7 @@ def save_into_projectfile(self): # Save structural information observations = numpy.zeros(len(self.structure_samples), LPF.stratigraphicObservationType) observations["layer"] = "s0" - observations["layerId"] = self.structure_samples["ID"] + observations["layerId"] = self.structure_samples["layerID"] observations["easting"] = self.structure_samples["X"] observations["northing"] = self.structure_samples["Y"] # observations["altitude"] = self.structure_samples["Z"] diff --git a/map2loop/sampler.py b/map2loop/sampler.py index ca5c6f66..73ad0876 100644 --- a/map2loop/sampler.py +++ b/map2loop/sampler.py @@ -4,6 +4,9 @@ import pandas import shapely import numpy +from .m2l_enums import Datatype +from .mapdata import MapData +from typing import Optional class Sampler(ABC): @@ -31,12 +34,14 @@ def type(self): @beartype.beartype @abstractmethod - def sample(self, map_data: geopandas.GeoDataFrame) -> pandas.DataFrame: + def sample( + self, spatial_data: geopandas.GeoDataFrame, map_data: Optional[MapData] = None + ) -> pandas.DataFrame: """ Execute sampling method (abstract method) Args: - map_data (geopandas.GeoDataFrame): data frame to sample + spatial_data (geopandas.GeoDataFrame): data frame to sample Returns: pandas.DataFrame: data frame containing samples @@ -64,19 +69,24 @@ def __init__(self, decimation: int = 1): self.decimation = decimation @beartype.beartype - def sample(self, map_data: geopandas.GeoDataFrame) -> pandas.DataFrame: + def sample( + self, spatial_data: geopandas.GeoDataFrame, map_data: Optional[MapData] = None + ) -> pandas.DataFrame: """ Execute sample method takes full point data, samples the data and returns the decimated points Args: - map_data (geopandas.GeoDataFrame): the data frame to sample + spatial_data (geopandas.GeoDataFrame): the data frame to sample Returns: pandas.DataFrame: the sampled data points """ - data = map_data.copy() + data = spatial_data.copy() data["X"] = data.geometry.x data["Y"] = data.geometry.y + data["layerID"] = geopandas.sjoin( + data, map_data.get_map_data(Datatype.GEOLOGY), how='left' + )['index_right'] data.reset_index(drop=True, inplace=True) return pandas.DataFrame(data[:: self.decimation].drop(columns="geometry")) @@ -102,19 +112,21 @@ def __init__(self, spacing: float = 50.0): self.spacing = spacing @beartype.beartype - def sample(self, map_data: geopandas.GeoDataFrame) -> pandas.DataFrame: + def sample( + self, spatial_data: geopandas.GeoDataFrame, map_data: Optional[MapData] = None + ) -> pandas.DataFrame: """ Execute sample method takes full point data, samples the data and returns the sampled points Args: - map_data (geopandas.GeoDataFrame): the data frame to sample (must contain column ["ID"]) + spatial_data (geopandas.GeoDataFrame): the data frame to sample (must contain column ["ID"]) Returns: pandas.DataFrame: the sampled data points """ schema = {"ID": str, "X": float, "Y": float} df = pandas.DataFrame(columns=schema.keys()).astype(schema) - for _, row in map_data.iterrows(): + for _, row in spatial_data.iterrows(): if type(row.geometry) is shapely.geometry.multipolygon.MultiPolygon: targets = row.geometry.boundary.geoms elif type(row.geometry) is shapely.geometry.polygon.Polygon: @@ -133,7 +145,7 @@ def sample(self, map_data: geopandas.GeoDataFrame) -> pandas.DataFrame: points = [target.interpolate(distance) for distance in distances] df2["X"] = [point.x for point in points] df2["Y"] = [point.y for point in points] - if "ID" in map_data.columns: + if "ID" in spatial_data.columns: df2["ID"] = row["ID"] else: df2["ID"] = 0