Skip to content

Commit

Permalink
Bug loading data from DuckDB to PostGis
Browse files Browse the repository at this point in the history
  • Loading branch information
CHRISCARLON committed Oct 3, 2024
1 parent 761e597 commit 353c041
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 24 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/target
/local/postgresql/data
13 changes: 13 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
services:
geodatabase:
image: postgis/postgis:16-3.4
container_name: gridwalk-geodatabase
platform: linux/amd64
volumes:
- ./local/postgresql/data:/var/lib/postgresql/data
environment:
POSTGRES_DB: gridwalk
POSTGRES_USER: admin
POSTGRES_PASSWORD: password
ports:
- "5432:5432"
62 changes: 39 additions & 23 deletions src/duckdb_load/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use duckdb::arrow::record_batch::RecordBatch;
use duckdb::arrow::util::pretty::print_batches;
use duckdb::{Connection, Result};
use std::error::Error;
use std::fs::File;
use std::io::{self, Read};

Expand All @@ -16,7 +15,7 @@ pub enum FileType {
}

// Determine the file type that is being processed
pub fn determine_file_type(file_content: &[u8]) -> io::Result<FileType> {
fn determine_file_type(file_content: &[u8]) -> io::Result<FileType> {
let header = &file_content[0..16.min(file_content.len())];
if &header[0..4] == b"PK\x03\x04" {
Ok(FileType::Excel)
Expand Down Expand Up @@ -57,36 +56,46 @@ pub fn determine_file_type(file_content: &[u8]) -> io::Result<FileType> {
}

// Get data schema
fn query_and_print_schema(conn: &Connection, query: &str, limit: usize) -> Result<()> {
let mut stmt = conn.prepare(&format!("{} LIMIT {}", query, limit))?;
fn query_and_print_schema(conn: &Connection) -> Result<()> {
let query = "SELECT * FROM data LIMIT 50";
let mut stmt = conn.prepare(query)?;
let arrow_result = stmt.query_arrow([])?;

// Get the schema
let schema = arrow_result.get_schema();
println!("Schema: {:?}", schema);
Ok(())
}

// Collect RecordBatches
let rbs: Vec<RecordBatch> = arrow_result.collect();

// Calculate total number of rows
let total_rows: usize = rbs.iter().map(|rb| rb.num_rows()).sum();

// Print batches
match print_batches(&rbs) {
Ok(_) => println!("Successfully printed {} rows of data.", total_rows),
Err(e) => eprintln!("Error printing batches: {}", e),
}

println!("Total number of rows in the result: {}", total_rows);

fn load_data_postgis(conn: &Connection) -> Result<(), Box<dyn Error>> {
// Attach PostGIS database
conn.execute(
"ATTACH 'dbname=gridwalk user=admin password=password host=localhost port=5432' AS gridwalk_db (TYPE POSTGRES)",
[],
)?;

// Drop the existing table if it exists
conn.execute("DROP TABLE IF EXISTS gridwalk_db.data_1", [])?;

// Create the new table structure
let create_table_query = "
CREATE TABLE gridwalk_db.data_1 AS
SELECT *,
geom::geometry AS geometry
FROM data;
";
conn.execute(create_table_query, [])?;

println!("Table 'data_1' created and data inserted successfully");
Ok(())
}

// DuckDB file loader
pub fn load_file_duckdb(file_path: &str, file_type: &FileType) -> Result<()> {
fn load_file_duckdb(file_path: &str, file_type: &FileType) -> Result<()> {
let conn = Connection::open_in_memory()?;
conn.execute("INSTALL spatial;", [])?;
conn.execute("LOAD spatial;", [])?;
conn.execute("INSTALL postgres;", [])?;
conn.execute("LOAD postgres;", [])?;

let create_table_query = match file_type {
FileType::Geopackage | FileType::Shapefile | FileType::Geojson => {
Expand Down Expand Up @@ -115,10 +124,17 @@ pub fn load_file_duckdb(file_path: &str, file_type: &FileType) -> Result<()> {
}
};

// Create the table in DuckDB
conn.execute(&create_table_query, [])?;

// Call the private function to query and print record batches
query_and_print_schema(&conn, "SELECT * FROM data", 5)?;
// Call to query and print data schema
query_and_print_schema(&conn)?;

// Call to load data into postgres and handle the result
match load_data_postgis(&conn) {
Ok(_) => println!("Data successfully loaded into PostgreSQL"),
Err(e) => eprintln!("Error loading data into PostgreSQL: {}", e),
}

Ok(())
}
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
mod duckdb_load;

fn main() -> Result<(), Box<dyn std::error::Error>> {
let file_path = "test_files/hotosm_twn_populated_places_points_geojson.geojson";
let file_path = "test_files/GLA_High_Street_boundaries.gpkg";
println!("Processing file: {}", file_path);

match duckdb_load::process_file(file_path) {
Expand Down

0 comments on commit 353c041

Please sign in to comment.