diff --git a/.gitignore b/.gitignore index 4708faf..a2e7dc8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,4 @@ /target /local/postgresql/data *.DS_Store -test_files/osopenusrn_202410.gpkg -test_files/codepo_gb.gpkg +test_files diff --git a/README.md b/README.md index 26ab762..f350d47 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,5 @@ This Rust library uses DuckDB and serves as a data transformation layer in the G The plan is to have this take in both a file path and a UUID to be used as a table name. Changes will be made to ensure that the library improves its overall functionality: -- Account for when geometry columns have different names - such as "geom", "geometry", or something else! -- Handle cases where data may have several geometry columns, not just 1! - Handle many different file formats - xlsx, csv, raster data, etc - Discard rows where there may be errors in the geometry column / ensure the programme doesn't crash when a geometry error is encountered - skip over it and log it instead diff --git a/src/duckdb_load/mod.rs b/src/duckdb_load/mod.rs index 36547cf..f8f98a0 100644 --- a/src/duckdb_load/mod.rs +++ b/src/duckdb_load/mod.rs @@ -53,8 +53,13 @@ impl DuckDBFileProcessor { // Call all the required methods self.create_data_table()?; self.query_and_print_schema()?; - self.transform_crs("4326")?; - self.load_data_postgis()?; + + // Transform geometry columns and store the result + let geom_columns = self.transform_geom_columns()?; + + // Pass the geometry columns to load_data_postgis + self.load_data_postgis(&geom_columns)?; + Ok(()) } @@ -71,7 +76,7 @@ impl DuckDBFileProcessor { match header { b"PK\x03\x04" => Ok(FileType::Excel), b"SQLite format 3\0" => Ok(FileType::Geopackage), - b"\x00\x00\x27\x0A" => Ok(FileType::Shapefile), + [0, 0, 39, 10, ..] => Ok(FileType::Shapefile), b"PAR1" => Ok(FileType::Parquet), _ if header.starts_with(b"{") => { let json_start = std::str::from_utf8(&buffer)?; @@ -165,44 +170,68 @@ impl DuckDBFileProcessor { } } - fn transform_crs(&self, target_crs: &str) -> Result> { - // Get current CRS + fn transform_geom_columns(&self) -> Result, Box> { + let query = "SELECT column_name FROM information_schema.columns WHERE table_name = 'data' AND data_type = 'GEOMETRY'"; + let mut stmt = self.conn.prepare(query)?; + let mut rows = stmt.query([])?; + let mut geom_columns = Vec::new(); + + while let Some(row) = rows.next()? { + let column_name: String = row.get(0)?; + geom_columns.push(column_name); + } + + println!("Geometry columns: {:?}", &geom_columns); + + // Call transform_crs for each geometry column + let target_crs = "4326"; + for column in &geom_columns { + self.transform_crs(column, target_crs)?; + } + + Ok(geom_columns) + } + + fn transform_crs(&self, geom_column: &str, target_crs: &str) -> Result> { let current_crs = self.get_crs_number()?; - println!("Current CRS: {}", current_crs); + println!("Current CRS for column {}: {}", geom_column, current_crs); - // Create two paths for 'match to target crs' and 'no match to target crs' let create_table_query = if current_crs == target_crs { - "CREATE TABLE transformed_data AS SELECT *, - ST_AsText(geom) as geom_wkt - FROM data;" + format!( + "CREATE TABLE transformed_data AS SELECT *, + ST_AsText({}) as {}_wkt + FROM data;", + geom_column, geom_column + ) } else { - &format!( + format!( "CREATE TABLE transformed_data AS SELECT *, - ST_AsText(ST_Transform(geom, 'EPSG:{}', 'EPSG:{}', always_xy := true)) AS geom_wkt + ST_AsText(ST_Transform({}, 'EPSG:{}', 'EPSG:{}', always_xy := true)) AS {}_wkt FROM data;", - current_crs, target_crs + geom_column, current_crs, target_crs, geom_column ) }; - // Excecute query and drop original geometry column - self.conn.execute(create_table_query, [])?; - self.conn - .execute("ALTER TABLE transformed_data DROP COLUMN geom;", [])?; + self.conn.execute(&create_table_query, [])?; + self.conn.execute( + &format!("ALTER TABLE transformed_data DROP COLUMN {};", geom_column), + [], + )?; if current_crs == target_crs { Ok(format!( - "CRS is already {}. Geometry converted to WKT and original geom column dropped.", - target_crs + "CRS for column {} is already {}. Geometry converted to WKT and original geom column dropped.", + geom_column, target_crs )) } else { Ok(format!( - "Transformation from EPSG:{} to EPSG:{} completed. Geometry converted to WKT and original geom column dropped.", - current_crs, target_crs + "Transformation of column {} from EPSG:{} to EPSG:{} completed. Geometry converted to WKT and original geom column dropped.", + geom_column, current_crs, target_crs )) } } - fn load_data_postgis(&self) -> Result<(), Box> { + fn load_data_postgis(&self, geom_columns: &[String]) -> Result<(), Box> { // Attach Postgres DB instance self.conn.execute( "ATTACH 'dbname=gridwalk user=admin password=password host=localhost port=5432' AS gridwalk_db (TYPE POSTGRES)", @@ -220,19 +249,32 @@ impl DuckDBFileProcessor { ); self.conn.execute(create_table_query, [])?; + // Construct PostGIS query for each geometry column + let mut postgis_queries = Vec::new(); + for geom_column in geom_columns { + postgis_queries.push(format!( + "ALTER TABLE {} ADD COLUMN {} geometry; + UPDATE {} SET {} = ST_GeomFromText({}_wkt, 4326); + ALTER TABLE {} DROP COLUMN {}_wkt;", + self.table_name, + geom_column, + self.table_name, + geom_column, + geom_column, + self.table_name, + geom_column + )); + } + let postgis_query = &format!( - "CALL postgres_execute('gridwalk_db', ' - ALTER TABLE {} ADD COLUMN geom geometry; - UPDATE {} SET geom = ST_GeomFromText(geom_wkt, 4326); - ALTER TABLE {} DROP COLUMN geom_wkt; - ');", - self.table_name, self.table_name, self.table_name + "CALL postgres_execute('gridwalk_db', '{}');", + postgis_queries.join("\n") ); self.conn.execute(postgis_query, [])?; println!( - "Table {} created and data inserted successfully", - self.table_name + "Table {} created and data inserted successfully with geometry columns: {:?}", + self.table_name, geom_columns ); Ok(()) } diff --git a/src/main.rs b/src/main.rs index b04b814..ca8e2c7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,6 @@ mod duckdb_load; use duckdb_load::launch_process_file; fn main() -> Result<(), Box> { - launch_process_file("test_files/GLA_High_Street_boundaries.gpkg", "my_table")?; + launch_process_file("test_files/2011 Greenbelt/GreenBelt2011.shp", "my_table")?; Ok(()) }