From b7dca9ba24435d9861bc04a994a2c1c8cbad5fb0 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Sat, 11 May 2024 03:33:22 -0300 Subject: [PATCH 01/33] First draft on task system usage, still missing job system --- Cargo.lock | 1 + core/Cargo.toml | 8 +- .../heavy-lifting/src/file_identifier/job.rs | 59 +- .../heavy-lifting/src/file_identifier/mod.rs | 1 + core/crates/heavy-lifting/src/indexer/job.rs | 61 +- .../heavy-lifting/src/job_system/job.rs | 226 +-- .../heavy-lifting/src/job_system/mod.rs | 66 +- .../heavy-lifting/src/job_system/runner.rs | 71 +- .../heavy-lifting/src/job_system/store.rs | 29 +- core/crates/heavy-lifting/src/lib.rs | 9 +- .../helpers/exif_media_data.rs | 150 +- .../helpers/ffmpeg_media_data.rs | 242 ++- .../src/media_processor/helpers/mod.rs | 9 + .../media_processor/helpers/thumbnailer.rs | 319 +++- .../heavy-lifting/src/media_processor/job.rs | 121 +- .../heavy-lifting/src/media_processor/mod.rs | 20 +- .../src/media_processor/shallow.rs | 9 +- .../tasks/media_data_extractor.rs | 96 +- .../src/media_processor/tasks/thumbnailer.rs | 228 +-- core/crates/prisma-helpers/src/lib.rs | 5 +- core/src/api/ephemeral_files.rs | 8 +- core/src/api/files.rs | 19 +- core/src/api/jobs.rs | 16 +- core/src/api/labels.rs | 7 +- core/src/api/locations.rs | 20 +- core/src/api/mod.rs | 3 +- core/src/api/nodes.rs | 11 +- core/src/api/search/mod.rs | 4 +- core/src/api/utils/invalidate.rs | 13 + core/src/context.rs | 205 +++ core/src/custom_uri/mod.rs | 2 +- core/src/lib.rs | 59 +- core/src/library/library.rs | 4 +- core/src/location/indexer/mod.rs | 4 +- core/src/location/indexer/old_indexer_job.rs | 1320 ++++++++--------- core/src/location/indexer/old_shallow.rs | 394 ++--- core/src/location/manager/watcher/utils.rs | 143 +- core/src/location/mod.rs | 6 +- core/src/location/non_indexed.rs | 39 +- core/src/node/config.rs | 4 +- core/src/object/media/mod.rs | 552 +++---- core/src/object/media/old_thumbnail/mod.rs | 397 +++-- core/src/object/mod.rs | 4 +- core/src/object/old_file_identifier/mod.rs | 6 +- core/src/old_job/error.rs | 12 +- core/src/old_job/manager.rs | 18 +- core/src/old_job/mod.rs | 10 +- crates/ai/src/old_image_labeler/process.rs | 4 +- 48 files changed, 2915 insertions(+), 2099 deletions(-) create mode 100644 core/src/context.rs diff --git a/Cargo.lock b/Cargo.lock index 64ad910be517..9c743a1621d2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9101,6 +9101,7 @@ dependencies = [ "sd-p2p-tunnel", "sd-prisma", "sd-sync", + "sd-task-system", "sd-utils", "serde", "serde-hashkey", diff --git a/core/Cargo.toml b/core/Cargo.toml index 027e0adf95f2..3f3e1ca83da2 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -13,7 +13,11 @@ default = [] # This feature allows features to be disabled when the Core is running on mobile. mobile = [] # This feature controls whether the Spacedrive Core contains functionality which requires FFmpeg. -ffmpeg = ["dep:sd-ffmpeg", "sd-core-heavy-lifting/ffmpeg", "sd-media-metadata/ffmpeg"] +ffmpeg = [ + "dep:sd-ffmpeg", + "sd-core-heavy-lifting/ffmpeg", + "sd-media-metadata/ffmpeg", +] heif = ["sd-images/heif"] ai = ["dep:sd-ai"] crypto = ["dep:sd-crypto"] @@ -25,6 +29,7 @@ sd-core-heavy-lifting = { path = "./crates/heavy-lifting" } sd-core-indexer-rules = { path = "./crates/indexer-rules" } sd-core-prisma-helpers = { path = "./crates/prisma-helpers" } sd-core-sync = { path = "./crates/sync" } + # Spacedrive Sub-crates sd-actors = { path = "../crates/actors", version = "0.1.0" } sd-ai = { path = "../crates/ai", optional = true } @@ -47,6 +52,7 @@ sd-p2p-proto = { path = "../crates/p2p/crates/proto" } sd-p2p-tunnel = { path = "../crates/p2p/crates/tunnel" } sd-prisma = { path = "../crates/prisma" } sd-sync = { path = "../crates/sync" } +sd-task-system = { path = "../crates/task-system" } sd-utils = { path = "../crates/utils" } # Workspace dependencies diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index 8ae358dee05d..490b43fd7d4d 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -7,7 +7,8 @@ use crate::{ SerializableJob, SerializedTasks, }, utils::sub_path::maybe_get_iso_file_path_from_sub_path, - Error, JobName, LocationScanState, NonCriticalError, OuterContext, ProgressUpdate, UpdateEvent, + Error, JobContext, JobName, LocationScanState, NonCriticalError, OuterContext, ProgressUpdate, + UpdateEvent, }; use sd_core_file_path_helper::IsolatedFilePathData; @@ -72,10 +73,10 @@ impl Hash for FileIdentifier { impl Job for FileIdentifier { const NAME: JobName = JobName::FileIdentifier; - async fn resume_tasks( + async fn resume_tasks( &mut self, dispatcher: &JobTaskDispatcher, - ctx: &impl OuterContext, + ctx: &impl JobContext, SerializedTasks(serialized_tasks): SerializedTasks, ) -> Result<(), Error> { self.pending_tasks_on_resume = dispatcher @@ -112,10 +113,10 @@ impl Job for FileIdentifier { Ok(()) } - async fn run( + async fn run( mut self, dispatcher: JobTaskDispatcher, - ctx: Ctx, + ctx: impl JobContext, ) -> Result { let mut pending_running_tasks = FuturesUnordered::new(); @@ -163,7 +164,7 @@ impl Job for FileIdentifier { if !self.tasks_for_shutdown.is_empty() { return Ok(ReturnStatus::Shutdown( - SerializableJob::::serialize(self).await, + SerializableJob::::serialize(self).await, )); } @@ -215,10 +216,10 @@ impl FileIdentifier { }) } - async fn init_or_resume( + async fn init_or_resume( &mut self, pending_running_tasks: &mut FuturesUnordered>, - ctx: &impl OuterContext, + ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Result<(), file_identifier::Error> { // if we don't have any pending task, then this is a fresh job @@ -281,11 +282,11 @@ impl FileIdentifier { /// # Panics /// Will panic if another task type is added in the job, but this function wasn't updated to handle it /// - async fn process_task_output( + async fn process_task_output( &mut self, task_id: TaskId, any_task_output: Box, - ctx: &impl OuterContext, + ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Option> { if any_task_output.is::() { @@ -306,7 +307,8 @@ impl FileIdentifier { .downcast::() .expect("just checked"), ctx, - ); + ) + .await; } else { unreachable!("Unexpected task output type: "); } @@ -314,7 +316,7 @@ impl FileIdentifier { None } - async fn process_extract_file_metadata_output( + async fn process_extract_file_metadata_output( &mut self, task_id: TaskId, extract_file_metadata::Output { @@ -322,7 +324,7 @@ impl FileIdentifier { extract_metadata_time, errors, }: extract_file_metadata::Output, - ctx: &impl OuterContext, + ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Option> { self.metadata.extract_metadata_time += extract_metadata_time; @@ -333,11 +335,13 @@ impl FileIdentifier { ctx.progress(vec![ProgressUpdate::CompletedTaskCount( self.metadata.completed_tasks, - )]); + )]) + .await; None } else { - ctx.progress_msg(format!("Identified {} files", identified_files.len())); + ctx.progress_msg(format!("Identified {} files", identified_files.len())) + .await; let with_priority = self.priority_tasks_ids.remove(&task_id); @@ -358,7 +362,7 @@ impl FileIdentifier { } } - fn process_object_processor_output( + async fn process_object_processor_output( &mut self, task_id: TaskId, object_processor::Output { @@ -370,7 +374,7 @@ impl FileIdentifier { created_objects_count, linked_objects_count, }: object_processor::Output, - ctx: &impl OuterContext, + ctx: &impl JobContext, ) { self.metadata.assign_cas_ids_time += assign_cas_ids_time; self.metadata.fetch_existing_objects_time += fetch_existing_objects_time; @@ -388,7 +392,8 @@ impl FileIdentifier { self.metadata.created_objects_count + self.metadata.linked_objects_count, self.metadata.total_found_orphans )), - ]); + ]) + .await; if self.priority_tasks_ids.remove(&task_id) { ctx.report_update(UpdateEvent::NewIdentifiedObjects { @@ -397,11 +402,11 @@ impl FileIdentifier { } } - async fn dispatch_priority_identifier_tasks( + async fn dispatch_priority_identifier_tasks( &mut self, last_orphan_file_path_id: &mut Option, sub_iso_file_path: &IsolatedFilePathData<'static>, - ctx: &impl OuterContext, + ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, pending_running_tasks: &FuturesUnordered>, ) -> Result, file_identifier::Error> { @@ -441,7 +446,8 @@ impl FileIdentifier { "{} files to be identified", self.metadata.total_found_orphans )), - ]); + ]) + .await; let priority_task = dispatcher .dispatch(ExtractFileMetadataTask::new( @@ -460,11 +466,11 @@ impl FileIdentifier { Ok(file_paths_already_identifying) } - async fn dispatch_deep_identifier_tasks( + async fn dispatch_deep_identifier_tasks( &mut self, last_orphan_file_path_id: &mut Option, maybe_sub_iso_file_path: &Option>, - ctx: &impl OuterContext, + ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, pending_running_tasks: &FuturesUnordered>, file_paths_already_identifying: &HashSet, @@ -511,7 +517,8 @@ impl FileIdentifier { "{} files to be identified", self.metadata.total_found_orphans )), - ]); + ]) + .await; pending_running_tasks.push( dispatcher @@ -605,7 +612,7 @@ impl From for ReportOutputMetadata { } } -impl SerializableJob for FileIdentifier { +impl SerializableJob for FileIdentifier { async fn serialize(self) -> Result>, rmp_serde::encode::Error> { let Self { location, @@ -657,7 +664,7 @@ impl SerializableJob for FileIdentifier { async fn deserialize( serialized_job: &[u8], - _: &Ctx, + _: &OuterCtx, ) -> Result)>, rmp_serde::decode::Error> { let SaveState { location, diff --git a/core/crates/heavy-lifting/src/file_identifier/mod.rs b/core/crates/heavy-lifting/src/file_identifier/mod.rs index b25e0857845c..fdb35795fa09 100644 --- a/core/crates/heavy-lifting/src/file_identifier/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/mod.rs @@ -25,6 +25,7 @@ use cas_id::generate_cas_id; pub use job::FileIdentifier; pub use shallow::shallow; + // we break these tasks into chunks of 100 to improve performance const CHUNK_SIZE: usize = 100; diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs index f2ad3f6e56f1..dd6a0d864666 100644 --- a/core/crates/heavy-lifting/src/indexer/job.rs +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -2,14 +2,14 @@ use crate::{ indexer, job_system::{ job::{ - Job, JobName, JobReturn, JobTaskDispatcher, OuterContext, ProgressUpdate, ReturnStatus, + Job, JobContext, JobName, JobReturn, JobTaskDispatcher, ProgressUpdate, ReturnStatus, }, report::ReportOutputMetadata, utils::cancel_pending_tasks, SerializableJob, SerializedTasks, }, utils::sub_path::get_full_path_from_sub_path, - Error, LocationScanState, NonCriticalError, + Error, LocationScanState, NonCriticalError, OuterContext, }; use sd_core_file_path_helper::IsolatedFilePathData; @@ -72,10 +72,10 @@ pub struct Indexer { impl Job for Indexer { const NAME: JobName = JobName::Indexer; - async fn resume_tasks( + async fn resume_tasks( &mut self, dispatcher: &JobTaskDispatcher, - ctx: &impl OuterContext, + ctx: &impl JobContext, SerializedTasks(serialized_tasks): SerializedTasks, ) -> Result<(), Error> { let location_id = self.location.id; @@ -130,10 +130,10 @@ impl Job for Indexer { Ok(()) } - async fn run( + async fn run( mut self, dispatcher: JobTaskDispatcher, - ctx: Ctx, + ctx: impl JobContext, ) -> Result { let mut pending_running_tasks = FuturesUnordered::new(); @@ -149,7 +149,7 @@ impl Job for Indexer { if !self.tasks_for_shutdown.is_empty() { return Ok(ReturnStatus::Shutdown( - SerializableJob::::serialize(self).await, + SerializableJob::::serialize(self).await, )); } @@ -185,7 +185,7 @@ impl Job for Indexer { if !self.tasks_for_shutdown.is_empty() { return Ok(ReturnStatus::Shutdown( - SerializableJob::::serialize(self).await, + SerializableJob::::serialize(self).await, )); } } @@ -295,18 +295,19 @@ impl Indexer { /// # Panics /// Will panic if another task type is added in the job, but this function wasn't updated to handle it /// - async fn process_task_output( + async fn process_task_output( &mut self, task_id: TaskId, any_task_output: Box, - ctx: &impl OuterContext, + ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Result>, indexer::Error> { self.metadata.completed_tasks += 1; ctx.progress(vec![ProgressUpdate::CompletedTaskCount( self.metadata.completed_tasks, - )]); + )]) + .await; if any_task_output.is::() { return self @@ -324,14 +325,16 @@ impl Indexer { .downcast::() .expect("just checked"), ctx, - ); + ) + .await; } else if any_task_output.is::() { self.process_update_output( *any_task_output .downcast::() .expect("just checked"), ctx, - ); + ) + .await; } else { unreachable!("Unexpected task output type: "); } @@ -339,7 +342,7 @@ impl Indexer { Ok(Vec::new()) } - async fn process_walk_output( + async fn process_walk_output( &mut self, WalkTaskOutput { to_create, @@ -352,7 +355,7 @@ impl Indexer { mut handles, scan_time, }: WalkTaskOutput, - ctx: &impl OuterContext, + ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Result>, indexer::Error> { self.metadata.scan_read_time += scan_time; @@ -451,43 +454,45 @@ impl Indexer { ProgressUpdate::message(format!( "Found {to_create_count} new files and {to_update_count} to update" )), - ]); + ]) + .await; Ok(handles) } - fn process_save_output( + async fn process_save_output( &mut self, SaveTaskOutput { saved_count, save_duration, }: SaveTaskOutput, - ctx: &impl OuterContext, + ctx: &impl JobContext, ) { self.metadata.indexed_count += saved_count; self.metadata.db_write_time += save_duration; - ctx.progress_msg(format!("Saved {saved_count} files")); + ctx.progress_msg(format!("Saved {saved_count} files")).await; } - fn process_update_output( + async fn process_update_output( &mut self, UpdateTaskOutput { updated_count, update_duration, }: UpdateTaskOutput, - ctx: &impl OuterContext, + ctx: &impl JobContext, ) { self.metadata.updated_count += updated_count; self.metadata.db_write_time += update_duration; - ctx.progress_msg(format!("Updated {updated_count} files")); + ctx.progress_msg(format!("Updated {updated_count} files")) + .await; } - async fn process_handles( + async fn process_handles( &mut self, pending_running_tasks: &mut FuturesUnordered>, - ctx: &impl OuterContext, + ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Option> { while let Some(task) = pending_running_tasks.next().await { @@ -539,10 +544,10 @@ impl Indexer { None } - async fn init_or_resume( + async fn init_or_resume( &mut self, pending_running_tasks: &mut FuturesUnordered>, - ctx: &impl OuterContext, + ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Result<(), indexer::Error> { // if we don't have any pending task, then this is a fresh job @@ -642,7 +647,7 @@ struct SaveState { tasks_for_shutdown_bytes: Option, } -impl SerializableJob for Indexer { +impl SerializableJob for Indexer { async fn serialize(self) -> Result>, rmp_serde::encode::Error> { let Self { location, @@ -710,7 +715,7 @@ impl SerializableJob for Indexer { async fn deserialize( serialized_job: &[u8], - _: &Ctx, + _: &OuterCtx, ) -> Result)>, rmp_serde::decode::Error> { let SaveState { location, diff --git a/core/crates/heavy-lifting/src/job_system/job.rs b/core/crates/heavy-lifting/src/job_system/job.rs index 4006481fd80a..b0e5132cfa98 100644 --- a/core/crates/heavy-lifting/src/job_system/job.rs +++ b/core/crates/heavy-lifting/src/job_system/job.rs @@ -11,6 +11,7 @@ use std::{ collections::{hash_map::DefaultHasher, VecDeque}, hash::{Hash, Hasher}, marker::PhantomData, + ops::{Deref, DerefMut}, path::Path, pin::pin, sync::Arc, @@ -80,48 +81,60 @@ pub trait OuterContext: Send + Sync + Clone + 'static { fn sync(&self) -> &Arc; fn invalidate_query(&self, query: &'static str); fn query_invalidator(&self) -> impl Fn(&'static str) + Send + Sync; - fn progress(&self, updates: Vec); - fn progress_msg(&self, msg: impl Into) { - self.progress(vec![ProgressUpdate::Message(msg.into())]); - } fn report_update(&self, update: UpdateEvent); fn get_data_directory(&self) -> &Path; } +pub trait JobContext: OuterContext { + fn new(report: Report, ctx: OuterCtx) -> Self; + fn progress(&self, updates: Vec) -> impl Future + Send; + fn progress_msg(&self, msg: impl Into) -> impl Future + Send { + let msg = msg.into(); + async move { + self.progress(vec![ProgressUpdate::Message(msg)]).await; + } + } + fn report(&self) -> impl Future + Send> + Send; + fn report_mut(&self) -> impl Future + Send> + Send; + fn get_outer_ctx(&self) -> OuterCtx; +} + pub trait Job: Send + Sync + Hash + 'static { const NAME: JobName; #[allow(unused_variables)] - fn resume_tasks( + fn resume_tasks( &mut self, dispatcher: &JobTaskDispatcher, - ctx: &impl OuterContext, + ctx: &impl JobContext, serialized_tasks: SerializedTasks, ) -> impl Future> + Send { async move { Ok(()) } } - fn run( + fn run( self, dispatcher: JobTaskDispatcher, - ctx: Ctx, + ctx: impl JobContext, ) -> impl Future> + Send; } -pub trait IntoJob +pub trait IntoJob where - J: Job + SerializableJob, - Ctx: OuterContext, + J: Job + SerializableJob, + OuterCtx: OuterContext, + JobCtx: JobContext, { - fn into_job(self) -> Box>; + fn into_job(self) -> Box>; } -impl IntoJob for J +impl IntoJob for J where - J: Job + SerializableJob, - Ctx: OuterContext, + J: Job + SerializableJob, + OuterCtx: OuterContext, + JobCtx: JobContext, { - fn into_job(self) -> Box> { + fn into_job(self) -> Box> { let id = JobId::new_v4(); Box::new(JobHolder { @@ -134,12 +147,13 @@ where } } -impl IntoJob for JobBuilder +impl IntoJob for JobBuilder where - J: Job + SerializableJob, - Ctx: OuterContext, + J: Job + SerializableJob, + OuterCtx: OuterContext, + JobCtx: JobContext, { - fn into_job(self) -> Box> { + fn into_job(self) -> Box> { self.build() } } @@ -262,24 +276,26 @@ pub enum JobOutputData { // TODO: Add more types } -pub struct JobBuilder +pub struct JobBuilder where - J: Job + SerializableJob, - Ctx: OuterContext, + J: Job + SerializableJob, + OuterCtx: OuterContext, + JobCtx: JobContext, { id: JobId, job: J, report_builder: ReportBuilder, - next_jobs: VecDeque>>, - _ctx: PhantomData, + next_jobs: VecDeque>>, + _ctx: PhantomData, } -impl JobBuilder +impl JobBuilder where - J: Job + SerializableJob, - Ctx: OuterContext, + J: Job + SerializableJob, + OuterCtx: OuterContext, + JobCtx: JobContext, { - pub fn build(self) -> Box> { + pub fn build(self) -> Box> { Box::new(JobHolder { id: self.id, job: self.job, @@ -319,7 +335,7 @@ where } #[must_use] - pub fn enqueue_next(mut self, next: impl Job + SerializableJob) -> Self { + pub fn enqueue_next(mut self, next: impl Job + SerializableJob) -> Self { let next_job_order = self.next_jobs.len() + 1; let mut child_job_builder = JobBuilder::new(next).with_parent_id(self.id); @@ -335,26 +351,26 @@ where } } -pub struct JobHolder +pub struct JobHolder where - J: Job + SerializableJob, - Ctx: OuterContext, + J: Job + SerializableJob, + OuterCtx: OuterContext, + JobCtx: JobContext, { pub(super) id: JobId, pub(super) job: J, pub(super) report: Report, - pub(super) next_jobs: VecDeque>>, - pub(super) _ctx: PhantomData, + pub(super) next_jobs: VecDeque>>, + pub(super) _ctx: PhantomData, } -pub struct JobHandle { - pub(crate) next_jobs: VecDeque>>, - pub(crate) ctx: Ctx, - pub(crate) report: Report, +pub struct JobHandle> { + pub(crate) next_jobs: VecDeque>>, + pub(crate) ctx: JobCtx, pub(crate) commands_tx: chan::Sender, } -impl JobHandle { +impl> JobHandle { pub async fn send_command(&mut self, command: Command) -> Result<(), JobSystemError> { if self.commands_tx.send(command).await.is_err() { warn!("Tried to send a {command:?} to a job that was already completed"); @@ -392,12 +408,9 @@ impl JobHandle { &mut self, start_time: DateTime, ) -> Result<(), JobSystemError> { - let Self { - next_jobs, - report, - ctx, - .. - } = self; + let Self { next_jobs, ctx, .. } = self; + + let mut report = ctx.report_mut().await; report.status = Status::Running; if report.started_at.is_none() { @@ -436,9 +449,11 @@ impl JobHandle { &mut self, job_return: JobReturn, ) -> Result { - let Self { report, ctx, .. } = self; + let Self { ctx, .. } = self; - let output = JobOutput::prepare_output_and_report(job_return, report); + let mut report = ctx.report_mut().await; + + let output = JobOutput::prepare_output_and_report(job_return, &mut report); report.update(ctx.db()).await?; @@ -446,53 +461,69 @@ impl JobHandle { } pub async fn failed_job(&mut self, e: &Error) -> Result<(), JobSystemError> { - let Self { report, ctx, .. } = self; - error!( - "Job failed with a critical error: {e:#?};", - report.id, report.name - ); - - report.status = Status::Failed; - report.critical_error = Some(e.to_string()); - report.completed_at = Some(Utc::now()); + let db = self.ctx.db(); + { + let mut report = self.ctx.report_mut().await; - report.update(ctx.db()).await?; + error!( + "Job failed with a critical error: {e:#?};", + report.id, report.name + ); + + report.status = Status::Failed; + report.critical_error = Some(e.to_string()); + report.completed_at = Some(Utc::now()); + + report.update(db).await?; + } self.command_children(Command::Cancel).await } pub async fn shutdown_pause_job(&mut self) -> Result<(), JobSystemError> { - let Self { report, ctx, .. } = self; - info!( - "Job paused due to system shutdown, we will pause all children jobs", - report.id, report.name - ); + let db = self.ctx.db(); - report.status = Status::Paused; + { + let mut report = self.ctx.report_mut().await; - report.update(ctx.db()).await?; + info!( + "Job paused due to system shutdown, we will pause all children jobs", + report.id, report.name + ); + + report.status = Status::Paused; + + report.update(db).await?; + } self.command_children(Command::Pause).await } pub async fn cancel_job(&mut self) -> Result<(), JobSystemError> { - let Self { report, ctx, .. } = self; - info!( - "Job canceled, we will cancel all children jobs", - report.id, report.name - ); + let db = self.ctx.db(); - report.status = Status::Canceled; - report.completed_at = Some(Utc::now()); + { + let mut report = self.ctx.report_mut().await; - report.update(ctx.db()).await?; + info!( + "Job canceled, we will cancel all children jobs", + report.id, report.name + ); + + report.status = Status::Canceled; + report.completed_at = Some(Utc::now()); + + report.update(db).await?; + } self.command_children(Command::Cancel).await } } #[async_trait::async_trait] -pub trait DynJob: Send + Sync + 'static { +pub trait DynJob>: + Send + Sync + 'static +{ fn id(&self) -> JobId; fn job_name(&self) -> JobName; @@ -501,33 +532,34 @@ pub trait DynJob: Send + Sync + 'static { fn report_mut(&mut self) -> &mut Report; - fn set_next_jobs(&mut self, next_jobs: VecDeque>>); + fn set_next_jobs(&mut self, next_jobs: VecDeque>>); - fn next_jobs(&self) -> &VecDeque>>; + fn next_jobs(&self) -> &VecDeque>>; async fn serialize(self: Box) -> Result>, rmp_serde::encode::Error>; fn dispatch( self: Box, base_dispatcher: BaseTaskDispatcher, - ctx: Ctx, + ctx: OuterCtx, done_tx: chan::Sender<(JobId, Result)>, - ) -> JobHandle; + ) -> JobHandle; fn resume( self: Box, base_dispatcher: BaseTaskDispatcher, - ctx: Ctx, + ctx: OuterCtx, serialized_tasks: Option, done_tx: chan::Sender<(JobId, Result)>, - ) -> JobHandle; + ) -> JobHandle; } #[async_trait::async_trait] -impl DynJob for JobHolder +impl DynJob for JobHolder where - J: Job + SerializableJob, - Ctx: OuterContext, + J: Job + SerializableJob, + OuterCtx: OuterContext, + JobCtx: JobContext, { fn id(&self) -> JobId { self.id @@ -548,11 +580,11 @@ where &mut self.report } - fn set_next_jobs(&mut self, next_jobs: VecDeque>>) { + fn set_next_jobs(&mut self, next_jobs: VecDeque>>) { self.next_jobs = next_jobs; } - fn next_jobs(&self) -> &VecDeque>> { + fn next_jobs(&self) -> &VecDeque>> { &self.next_jobs } @@ -563,12 +595,14 @@ where fn dispatch( self: Box, base_dispatcher: BaseTaskDispatcher, - ctx: Ctx, + ctx: OuterCtx, done_tx: chan::Sender<(JobId, Result)>, - ) -> JobHandle { + ) -> JobHandle { let (commands_tx, commands_rx) = chan::bounded(8); - spawn(to_spawn_job( + let ctx = JobCtx::new(self.report, ctx); + + spawn(to_spawn_job::( self.id, self.job, ctx.clone(), @@ -581,7 +615,6 @@ where JobHandle { next_jobs: self.next_jobs, ctx, - report: self.report, commands_tx, } } @@ -589,13 +622,15 @@ where fn resume( self: Box, base_dispatcher: BaseTaskDispatcher, - ctx: Ctx, + ctx: OuterCtx, serialized_tasks: Option, done_tx: chan::Sender<(JobId, Result)>, - ) -> JobHandle { + ) -> JobHandle { let (commands_tx, commands_rx) = chan::bounded(8); - spawn(to_spawn_job( + let ctx = JobCtx::new(self.report, ctx); + + spawn(to_spawn_job::( self.id, self.job, ctx.clone(), @@ -608,16 +643,15 @@ where JobHandle { next_jobs: self.next_jobs, ctx, - report: self.report, commands_tx, } } } -async fn to_spawn_job( +async fn to_spawn_job>( id: JobId, mut job: impl Job, - ctx: Ctx, + ctx: JobCtx, existing_tasks: Option, base_dispatcher: BaseTaskDispatcher, commands_rx: chan::Receiver, @@ -650,7 +684,7 @@ async fn to_spawn_job( let mut msgs_stream = pin!(( commands_rx.map(StreamMessage::Commands), remote_controllers_rx.map(StreamMessage::NewRemoteController), - stream::once(job.run(dispatcher, ctx)).map(StreamMessage::Done), + stream::once(job.run::(dispatcher, ctx)).map(StreamMessage::Done), ) .merge()); diff --git a/core/crates/heavy-lifting/src/job_system/mod.rs b/core/crates/heavy-lifting/src/job_system/mod.rs index a8b552a70072..140425b72244 100644 --- a/core/crates/heavy-lifting/src/job_system/mod.rs +++ b/core/crates/heavy-lifting/src/job_system/mod.rs @@ -1,10 +1,15 @@ -use crate::Error; +use crate::{Error, JobContext}; use sd_prisma::prisma::location; use sd_task_system::BaseTaskDispatcher; use sd_utils::error::FileIOError; -use std::{cell::RefCell, collections::hash_map::HashMap, path::Path, sync::Arc}; +use std::{ + cell::RefCell, + collections::hash_map::HashMap, + path::{Path, PathBuf}, + sync::Arc, +}; use async_channel as chan; use futures::Stream; @@ -20,7 +25,7 @@ mod runner; mod store; pub mod utils; -use error::JobSystemError; +pub use error::JobSystemError; use job::{IntoJob, Job, JobName, JobOutput, OuterContext}; use runner::{run, JobSystemRunner, RunnerMessage}; use store::{load_jobs, StoredJobEntry}; @@ -38,18 +43,18 @@ pub enum Command { Cancel, } -pub struct JobSystem { - msgs_tx: chan::Sender>, +pub struct JobSystem> { + msgs_tx: chan::Sender>, job_outputs_rx: chan::Receiver<(JobId, Result)>, + store_jobs_file: Arc, runner_handle: RefCell>>, } -impl JobSystem { - pub async fn new( +impl> JobSystem { + pub fn new( base_dispatcher: BaseTaskDispatcher, - data_directory: impl AsRef + Send, - previously_existing_contexts: &HashMap, - ) -> Result { + data_directory: impl AsRef, + ) -> Self { let (job_outputs_tx, job_outputs_rx) = chan::unbounded(); let (job_return_status_tx, job_return_status_rx) = chan::bounded(16); let (msgs_tx, msgs_rx) = chan::bounded(8); @@ -97,18 +102,24 @@ impl JobSystem { } }))); - load_stored_job_entries( - store_jobs_file.as_ref(), - previously_existing_contexts, - &msgs_tx, - ) - .await?; - - Ok(Self { + Self { msgs_tx, job_outputs_rx, + store_jobs_file, runner_handle, - }) + } + } + + pub async fn init( + &self, + previously_existing_contexts: &HashMap, + ) -> Result<(), JobSystemError> { + load_stored_job_entries( + &*self.store_jobs_file, + previously_existing_contexts, + &self.msgs_tx, + ) + .await } /// Checks if *any* of the desired jobs is running for the desired location @@ -164,11 +175,11 @@ impl JobSystem { /// Dispatch a new job to the system /// # Panics /// Panics only happen if internal channels are unexpectedly closed - pub async fn dispatch>( + pub async fn dispatch>( &mut self, - job: impl IntoJob + Send, + job: impl IntoJob + Send, location_id: location::id::Type, - ctx: Ctx, + ctx: OuterCtx, ) -> Result { let dyn_job = job.into_job(); let id = dyn_job.id(); @@ -230,12 +241,15 @@ impl JobSystem { /// SAFETY: Due to usage of refcell we lost `Sync` impl, but we only use it to have a shutdown method /// receiving `&self` which is called once, and we also use `try_borrow_mut` so we never panic -unsafe impl Sync for JobSystem {} +unsafe impl> Sync + for JobSystem +{ +} -async fn load_stored_job_entries( +async fn load_stored_job_entries>( store_jobs_file: impl AsRef + Send, - previously_existing_job_contexts: &HashMap, - msgs_tx: &chan::Sender>, + previously_existing_job_contexts: &HashMap, + msgs_tx: &chan::Sender>, ) -> Result<(), JobSystemError> { let store_jobs_file = store_jobs_file.as_ref(); diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index f1ea8f1376c3..59dc555d13e5 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -1,4 +1,4 @@ -use crate::Error; +use crate::{Error, JobContext}; use sd_prisma::prisma::location; use sd_task_system::BaseTaskDispatcher; @@ -35,19 +35,19 @@ use super::{ const JOBS_INITIAL_CAPACITY: usize = 32; const FIVE_MINUTES: Duration = Duration::from_secs(5 * 60); -pub(super) enum RunnerMessage { +pub(super) enum RunnerMessage> { NewJob { id: JobId, location_id: location::id::Type, - dyn_job: Box>, - ctx: Ctx, + dyn_job: Box>, + ctx: OuterCtx, ack_tx: oneshot::Sender>, }, ResumeStoredJob { id: JobId, location_id: location::id::Type, - dyn_job: Box>, - ctx: Ctx, + dyn_job: Box>, + ctx: OuterCtx, serialized_tasks: Option, ack_tx: oneshot::Sender>, }, @@ -64,9 +64,9 @@ pub(super) enum RunnerMessage { Shutdown, } -pub(super) struct JobSystemRunner { +pub(super) struct JobSystemRunner> { base_dispatcher: BaseTaskDispatcher, - handles: HashMap>, + handles: HashMap>, job_hashes: HashMap, job_hashes_by_id: HashMap, running_jobs_by_job_id: HashMap, @@ -76,7 +76,7 @@ pub(super) struct JobSystemRunner { job_outputs_tx: chan::Sender<(JobId, Result)>, } -impl JobSystemRunner { +impl> JobSystemRunner { pub(super) fn new( base_dispatcher: BaseTaskDispatcher, job_return_status_tx: chan::Sender<(JobId, Result)>, @@ -99,8 +99,8 @@ impl JobSystemRunner { &mut self, id: JobId, location_id: location::id::Type, - dyn_job: Box>, - ctx: Ctx, + dyn_job: Box>, + ctx: OuterCtx, maybe_existing_tasks: Option, ) -> Result<(), JobSystemError> { let Self { @@ -149,17 +149,21 @@ impl JobSystemRunner { ) }; - handle.report.status = report::Status::Running; - if handle.report.started_at.is_none() { - handle.report.started_at = Some(start_time); - } + { + let mut report = handle.ctx.report_mut().await; - // If the report doesn't have a created_at date, it's a new report - if handle.report.created_at.is_none() { - handle.report.create(db).await?; - } else { - // Otherwise it can be a job being resumed or a children job that was already been created - handle.report.update(db).await?; + report.status = report::Status::Running; + if report.started_at.is_none() { + report.started_at = Some(start_time); + } + + // If the report doesn't have a created_at date, it's a new report + if report.created_at.is_none() { + report.create(db).await?; + } else { + // Otherwise it can be a job being resumed or a children job that was already been created + report.update(db).await?; + } } // Registering children jobs @@ -243,7 +247,7 @@ impl JobSystemRunner { } Ok(ReturnStatus::Shutdown(Ok(Some(serialized_job)))) => { - let name = handle.report.name; + let name = handle.ctx.report().await.name; let Ok(next_jobs) = handle .next_jobs @@ -384,11 +388,11 @@ impl JobSystemRunner { } } -fn try_dispatch_next_job( - handle: &mut JobHandle, +fn try_dispatch_next_job>( + handle: &mut JobHandle, base_dispatcher: BaseTaskDispatcher, (job_hashes, job_hashes_by_id): (&mut HashMap, &mut HashMap), - handles: &mut HashMap>, + handles: &mut HashMap>, job_return_status_tx: chan::Sender<(JobId, Result)>, ) { if let Some(next) = handle.next_jobs.pop_front() { @@ -397,8 +401,11 @@ fn try_dispatch_next_job( if let Entry::Vacant(e) = job_hashes.entry(next_hash) { e.insert(next_id); job_hashes_by_id.insert(next_id, next_hash); - let mut next_handle = - next.dispatch(base_dispatcher, handle.ctx.clone(), job_return_status_tx); + let mut next_handle = next.dispatch( + base_dispatcher, + handle.ctx.get_outer_ctx(), + job_return_status_tx, + ); assert!( next_handle.next_jobs.is_empty(), @@ -415,15 +422,15 @@ fn try_dispatch_next_job( } } -pub(super) async fn run( - mut runner: JobSystemRunner, +pub(super) async fn run>( + mut runner: JobSystemRunner, store_jobs_file: impl AsRef + Send, - msgs_rx: chan::Receiver>, + msgs_rx: chan::Receiver>, job_return_status_rx: chan::Receiver<(JobId, Result)>, ) { - enum StreamMessage { + enum StreamMessage> { ReturnStatus((JobId, Result)), - RunnerMessage(RunnerMessage), + RunnerMessage(RunnerMessage), CleanMemoryTick, } diff --git a/core/crates/heavy-lifting/src/job_system/store.rs b/core/crates/heavy-lifting/src/job_system/store.rs index 8c40c7dc5022..3a0d5a833322 100644 --- a/core/crates/heavy-lifting/src/job_system/store.rs +++ b/core/crates/heavy-lifting/src/job_system/store.rs @@ -1,4 +1,4 @@ -use crate::{file_identifier, indexer, media_processor}; +use crate::{file_identifier, indexer, media_processor, JobContext}; use sd_prisma::prisma::{job, location}; use sd_utils::uuid_to_bytes; @@ -22,7 +22,7 @@ use super::{ #[derive(Debug, Serialize, Deserialize)] pub struct SerializedTasks(pub Vec); -pub trait SerializableJob: 'static +pub trait SerializableJob: 'static where Self: Sized, { @@ -35,7 +35,7 @@ where #[allow(unused_variables)] fn deserialize( serialized_job: &[u8], - ctx: &Ctx, + ctx: &OuterCtx, ) -> impl Future< Output = Result)>, rmp_serde::decode::Error>, > + Send { @@ -57,13 +57,13 @@ pub struct StoredJobEntry { pub(super) next_jobs: Vec, } -pub async fn load_jobs( +pub async fn load_jobs>( entries: Vec, - ctx: &Ctx, + ctx: &OuterCtx, ) -> Result< Vec<( location::id::Type, - Box>, + Box>, Option, )>, JobSystemError, @@ -166,7 +166,7 @@ pub async fn load_jobs( } macro_rules! match_deserialize_job { - ($stored_job:ident, $report:ident, $ctx:ident, $ctx_type:ty, [$($job_type:ty),+ $(,)?]) => {{ + ($stored_job:ident, $report:ident, $outer_ctx:ident, $outer_ctx_type:ty, $job_ctx_type:ty, [$($job_type:ty),+ $(,)?]) => {{ let StoredJob { id, name, @@ -175,12 +175,12 @@ macro_rules! match_deserialize_job { match name { - $(<$job_type as Job>::NAME => <$job_type as SerializableJob<$ctx_type>>::deserialize( + $(<$job_type as Job>::NAME => <$job_type as SerializableJob<$outer_ctx_type>>::deserialize( &serialized_job, - $ctx, + $outer_ctx, ).await .map(|maybe_job| maybe_job.map(|(job, tasks)| -> ( - Box>, + Box>, Option ) { ( @@ -200,16 +200,17 @@ macro_rules! match_deserialize_job { }}; } -async fn load_job( +async fn load_job>( stored_job: StoredJob, report: Report, - ctx: &Ctx, -) -> Result>, Option)>, JobSystemError> { + ctx: &OuterCtx, +) -> Result>, Option)>, JobSystemError> { match_deserialize_job!( stored_job, report, ctx, - Ctx, + OuterCtx, + JobCtx, [ indexer::job::Indexer, file_identifier::job::FileIdentifier, diff --git a/core/crates/heavy-lifting/src/lib.rs b/core/crates/heavy-lifting/src/lib.rs index b0d18ffd733f..611815c3bbbb 100644 --- a/core/crates/heavy-lifting/src/lib.rs +++ b/core/crates/heavy-lifting/src/lib.rs @@ -44,8 +44,11 @@ pub mod utils; use media_processor::ThumbKey; pub use job_system::{ - job::{IntoJob, JobBuilder, JobName, JobOutput, JobOutputData, OuterContext, ProgressUpdate}, - JobId, JobSystem, + job::{ + IntoJob, JobBuilder, JobContext, JobName, JobOutput, JobOutputData, OuterContext, + ProgressUpdate, + }, + JobId, JobSystem, JobSystemError, }; #[derive(Error, Debug)] @@ -96,7 +99,7 @@ pub enum LocationScanState { #[derive(Debug, Serialize, Type)] pub enum UpdateEvent { - NewThumbnailEvent { + NewThumbnail { thumb_key: ThumbKey, }, NewIdentifiedObjects { diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs b/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs index 3fa2c76184f2..fcbd020ab455 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs @@ -1,13 +1,25 @@ use crate::media_processor::{self, media_data_extractor}; +use prisma_client_rust::QueryError; +use sd_core_sync::Manager as SyncManager; + use sd_file_ext::extensions::{Extension, ImageExtension, ALL_IMAGE_EXTENSIONS}; use sd_media_metadata::ExifMetadata; -use sd_prisma::prisma::{exif_data, object, PrismaClient}; +use sd_prisma::{ + prisma::{exif_data, object, PrismaClient}, + prisma_sync, +}; +use sd_sync::{option_sync_db_entry, OperationFactory}; +use sd_utils::{chain_optional_iter, uuid_to_bytes}; +use uuid::Uuid; use std::path::Path; +use futures_concurrency::future::TryJoin; use once_cell::sync::Lazy; +use super::from_slice_option_to_option; + pub static AVAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { ALL_IMAGE_EXTENSIONS .iter() @@ -17,6 +29,7 @@ pub static AVAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { .collect() }); +#[must_use] pub const fn can_extract(image_extension: ImageExtension) -> bool { use ImageExtension::{ Avci, Avcs, Avif, Dng, Heic, Heif, Heifs, Hif, Jpeg, Jpg, Png, Tiff, Webp, @@ -27,24 +40,53 @@ pub const fn can_extract(image_extension: ImageExtension) -> bool { ) } -pub fn to_query( - mdi: ExifMetadata, +#[must_use] +fn to_query( + ExifMetadata { + resolution, + date_taken, + location, + camera_data, + artist, + description, + copyright, + exif_version, + }: ExifMetadata, object_id: exif_data::object_id::Type, -) -> exif_data::CreateUnchecked { - exif_data::CreateUnchecked { - object_id, - _params: vec![ - exif_data::camera_data::set(serde_json::to_vec(&mdi.camera_data).ok()), - exif_data::media_date::set(serde_json::to_vec(&mdi.date_taken).ok()), - exif_data::resolution::set(serde_json::to_vec(&mdi.resolution).ok()), - exif_data::media_location::set(serde_json::to_vec(&mdi.location).ok()), - exif_data::artist::set(mdi.artist), - exif_data::description::set(mdi.description), - exif_data::copyright::set(mdi.copyright), - exif_data::exif_version::set(mdi.exif_version), - exif_data::epoch_time::set(mdi.date_taken.map(|x| x.unix_timestamp())), +) -> (Vec<(&'static str, rmpv::Value)>, exif_data::Create) { + let (sync_params, db_params) = chain_optional_iter( + [], + [ + option_sync_db_entry!( + serde_json::to_vec(&camera_data).ok(), + exif_data::camera_data + ), + option_sync_db_entry!(serde_json::to_vec(&date_taken).ok(), exif_data::media_date), + option_sync_db_entry!(serde_json::to_vec(&resolution).ok(), exif_data::resolution), + option_sync_db_entry!( + serde_json::to_vec(&location).ok(), + exif_data::media_location + ), + option_sync_db_entry!(artist, exif_data::artist), + option_sync_db_entry!(description, exif_data::description), + option_sync_db_entry!(copyright, exif_data::copyright), + option_sync_db_entry!(exif_version, exif_data::exif_version), + option_sync_db_entry!( + date_taken.map(|x| x.unix_timestamp()), + exif_data::epoch_time + ), ], - } + ) + .into_iter() + .unzip(); + + ( + sync_params, + exif_data::Create { + object: object::id::equals(object_id), + _params: db_params, + }, + ) } pub async fn extract( @@ -62,24 +104,62 @@ pub async fn extract( } pub async fn save( - media_datas: Vec<(ExifMetadata, object::id::Type)>, + exif_datas: impl IntoIterator + Send, db: &PrismaClient, -) -> Result { - db.exif_data() - .create_many( - media_datas - .into_iter() - .map(|(exif_data, object_id)| to_query(exif_data, object_id)) - .collect(), - ) - .skip_duplicates() - .exec() - .await - .map(|created| { - #[allow(clippy::cast_sign_loss)] - { - created as u64 - } + sync: &SyncManager, +) -> Result { + exif_datas + .into_iter() + .map(|(exif_data, object_id, object_pub_id)| async move { + let (sync_params, create) = to_query(exif_data, object_id); + let db_params = create._params.clone(); + + sync.write_ops( + db, + ( + sync.shared_create( + prisma_sync::exif_data::SyncId { + object: prisma_sync::object::SyncId { + pub_id: uuid_to_bytes(object_pub_id), + }, + }, + sync_params, + ), + db.exif_data() + .upsert(exif_data::object_id::equals(object_id), create, db_params) + .select(exif_data::select!({ id })), + ), + ) + .await }) - .map_err(Into::into) + .collect::>() + .try_join() + .await + .map(|created_vec| created_vec.len() as u64) +} + +#[must_use] +pub fn from_prisma_data( + exif_data::Data { + resolution, + media_date, + media_location, + camera_data, + artist, + description, + copyright, + exif_version, + .. + }: exif_data::Data, +) -> ExifMetadata { + ExifMetadata { + camera_data: from_slice_option_to_option(camera_data).unwrap_or_default(), + date_taken: from_slice_option_to_option(media_date).unwrap_or_default(), + resolution: from_slice_option_to_option(resolution).unwrap_or_default(), + location: from_slice_option_to_option(media_location), + artist, + description, + copyright, + exif_version, + } } diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs index 0d1734c223d4..f8ead71170fe 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs @@ -1,5 +1,6 @@ use crate::media_processor::{self, media_data_extractor}; +use sd_core_prisma_helpers::object_with_media_data; use sd_file_ext::extensions::{ AudioExtension, Extension, VideoExtension, ALL_AUDIO_EXTENSIONS, ALL_VIDEO_EXTENSIONS, }; @@ -19,7 +20,7 @@ use sd_prisma::prisma::{ ffmpeg_data, ffmpeg_media_audio_props, ffmpeg_media_chapter, ffmpeg_media_codec, ffmpeg_media_program, ffmpeg_media_stream, ffmpeg_media_video_props, object, PrismaClient, }; -use sd_utils::db::ffmpeg_data_field_to_db; +use sd_utils::db::{ffmpeg_data_field_from_db, ffmpeg_data_field_to_db}; use std::{collections::HashMap, path::Path}; @@ -28,6 +29,8 @@ use once_cell::sync::Lazy; use prisma_client_rust::QueryError; use tracing::error; +use super::from_slice_option_to_option; + pub static AVAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { ALL_AUDIO_EXTENSIONS .iter() @@ -44,6 +47,7 @@ pub static AVAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { .collect() }); +#[must_use] pub const fn can_extract_for_audio(audio_extension: AudioExtension) -> bool { use AudioExtension::{ Aac, Adts, Aif, Aiff, Amr, Aptx, Ast, Caf, Flac, Loas, M4a, Mid, Mp2, Mp3, Oga, Ogg, Opus, @@ -63,6 +67,7 @@ pub const fn can_extract_for_audio(audio_extension: AudioExtension) -> bool { ) } +#[must_use] pub const fn can_extract_for_video(video_extension: VideoExtension) -> bool { use VideoExtension::{ Asf, Avi, Avifs, F4v, Flv, Hevc, M2ts, M2v, M4v, Mjpeg, Mkv, Mov, Mp4, Mpe, Mpeg, Mpg, Mts, @@ -101,7 +106,7 @@ pub async fn extract( pub async fn save( ffmpeg_datas: impl IntoIterator + Send, db: &PrismaClient, -) -> Result { +) -> Result { ffmpeg_datas .into_iter() .map( @@ -570,3 +575,236 @@ async fn create_ffmpeg_video_props( .await .map(|_| ()) } + +pub fn from_prisma_data( + object_with_media_data::ffmpeg_data::Data { + formats, + duration, + start_time, + bit_rate, + metadata, + chapters, + programs, + .. + }: object_with_media_data::ffmpeg_data::Data, +) -> FFmpegMetadata { + FFmpegMetadata { + formats: formats.split(',').map(String::from).collect::>(), + duration: duration.map(|duration| { + let duration = ffmpeg_data_field_from_db(&duration); + + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + let duration = ((duration >> 32) as i32, duration as u32); + duration + }), + start_time: start_time.map(|start_time| { + let start_time = ffmpeg_data_field_from_db(&start_time); + + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + let start_time = ((start_time >> 32) as i32, start_time as u32); + start_time + }), + bit_rate: { + let bit_rate = ffmpeg_data_field_from_db(&bit_rate); + + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + let bit_rate = ((bit_rate >> 32) as i32, bit_rate as u32); + bit_rate + }, + chapters: chapters_from_prisma_data(chapters), + programs: programs_from_prisma_data(programs), + metadata: from_slice_option_to_option(metadata).unwrap_or_default(), + } +} + +#[inline] +fn chapters_from_prisma_data(chapters: Vec) -> Vec { + chapters + .into_iter() + .map( + |ffmpeg_media_chapter::Data { + chapter_id, + start, + end, + time_base_den, + time_base_num, + metadata, + .. + }| Chapter { + id: chapter_id, + start: { + let start = ffmpeg_data_field_from_db(&start); + + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + let start = ((start >> 32) as i32, start as u32); + start + }, + end: { + let end = ffmpeg_data_field_from_db(&end); + + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + let end = ((end >> 32) as i32, end as u32); + end + }, + time_base_den, + time_base_num, + metadata: from_slice_option_to_option(metadata).unwrap_or_default(), + }, + ) + .collect() +} + +#[inline] +fn programs_from_prisma_data( + programs: Vec, +) -> Vec { + programs + .into_iter() + .map( + |object_with_media_data::ffmpeg_data::programs::Data { + program_id, + name, + metadata, + streams, + .. + }| Program { + id: program_id, + name, + streams: streams_from_prisma_data(streams), + metadata: from_slice_option_to_option(metadata).unwrap_or_default(), + }, + ) + .collect() +} + +fn streams_from_prisma_data( + streams: Vec, +) -> Vec { + streams + .into_iter() + .map( + |object_with_media_data::ffmpeg_data::programs::streams::Data { + stream_id, + name, + aspect_ratio_num, + aspect_ratio_den, + frames_per_second_num, + frames_per_second_den, + time_base_real_den, + time_base_real_num, + dispositions, + metadata, + codec, + .. + }| { + Stream { + id: stream_id, + name, + codec: codec_from_prisma_data(codec), + aspect_ratio_num, + aspect_ratio_den, + frames_per_second_num, + frames_per_second_den, + time_base_real_den, + time_base_real_num, + dispositions: dispositions + .map(|dispositions| { + dispositions + .split(',') + .map(String::from) + .collect::>() + }) + .unwrap_or_default(), + metadata: from_slice_option_to_option(metadata).unwrap_or_default(), + } + }, + ) + .collect() +} + +fn codec_from_prisma_data( + codec: Option, +) -> Option { + codec.map( + |object_with_media_data::ffmpeg_data::programs::streams::codec::Data { + kind, + sub_kind, + tag, + name, + profile, + bit_rate, + audio_props, + video_props, + .. + }| Codec { + kind, + sub_kind, + tag, + name, + profile, + bit_rate, + props: match (audio_props, video_props) { + ( + Some(ffmpeg_media_audio_props::Data { + delay, + padding, + sample_rate, + sample_format, + bit_per_sample, + channel_layout, + .. + }), + None, + ) => Some(Props::Audio(AudioProps { + delay, + padding, + sample_rate, + sample_format, + bit_per_sample, + channel_layout, + })), + ( + None, + Some(ffmpeg_media_video_props::Data { + pixel_format, + color_range, + bits_per_channel, + color_space, + color_primaries, + color_transfer, + field_order, + chroma_location, + width, + height, + aspect_ratio_num, + aspect_ratio_den, + properties, + .. + }), + ) => Some(Props::Video(VideoProps { + pixel_format, + color_range, + bits_per_channel, + color_space, + color_primaries, + color_transfer, + field_order, + chroma_location, + width, + height, + aspect_ratio_num, + aspect_ratio_den, + properties: properties + .map(|dispositions| { + dispositions + .split(',') + .map(String::from) + .collect::>() + }) + .unwrap_or_default(), + })), + _ => None, + }, + }, + ) +} diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/mod.rs b/core/crates/heavy-lifting/src/media_processor/helpers/mod.rs index 4432d19a7a78..70298135541f 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/mod.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/mod.rs @@ -1,3 +1,12 @@ pub mod exif_media_data; pub mod ffmpeg_media_data; pub mod thumbnailer; + +#[must_use] +fn from_slice_option_to_option( + value: Option>, +) -> Option { + value + .map(|x| serde_json::from_slice(&x).ok()) + .unwrap_or_default() +} diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs index 5f2de34e7ba2..20e7164d8e95 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs @@ -1,15 +1,34 @@ -use once_cell::sync::Lazy; +use crate::media_processor::thumbnailer; + +use image::{imageops, DynamicImage, GenericImageView}; use sd_file_ext::extensions::{ DocumentExtension, Extension, ImageExtension, ALL_DOCUMENT_EXTENSIONS, ALL_IMAGE_EXTENSIONS, }; #[cfg(feature = "ffmpeg")] use sd_file_ext::extensions::{VideoExtension, ALL_VIDEO_EXTENSIONS}; +use sd_images::{format_image, scale_dimensions, ConvertibleExtension}; +use sd_media_metadata::exif::Orientation; +use sd_utils::error::FileIOError; +use webp::Encoder; -use std::time::Duration; +use std::{ + ops::Deref, + path::{Path, PathBuf}, + str::FromStr, + time::Duration, +}; +use once_cell::sync::Lazy; use serde::{Deserialize, Serialize}; use specta::Type; +use tokio::{ + fs, io, + sync::Mutex, + task::spawn_blocking, + time::{sleep, Instant}, +}; +use tracing::{error, trace}; use uuid::Uuid; // Files names constants @@ -28,6 +47,10 @@ pub const TARGET_QUALITY: f32 = 60.0; /// How much time we allow for the thumbnail generation process to complete before we give up. pub const THUMBNAIL_GENERATION_TIMEOUT: Duration = Duration::from_secs(60); +pub fn get_thumbnails_directory(data_directory: impl AsRef) -> PathBuf { + data_directory.as_ref().join(THUMBNAIL_CACHE_DIR_NAME) +} + #[cfg(feature = "ffmpeg")] pub static THUMBNAILABLE_VIDEO_EXTENSIONS: Lazy> = Lazy::new(|| { ALL_VIDEO_EXTENSIONS @@ -68,7 +91,7 @@ pub static ALL_THUMBNAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { /// This type is used to pass the relevant data to the frontend so it can request the thumbnail. /// Tt supports extending the shard hex to support deeper directory structures in the future -#[derive(Debug, Serialize, Deserialize, Type)] +#[derive(Debug, Serialize, Deserialize, Type, Clone)] pub struct ThumbKey { pub shard_hex: String, pub cas_id: String, @@ -87,6 +110,24 @@ impl ThumbKey { }, } } + + #[must_use] + pub fn new_indexed(cas_id: &str, library_id: Uuid) -> Self { + Self { + shard_hex: get_shard_hex(cas_id).to_string(), + cas_id: cas_id.to_string(), + base_directory_str: library_id.to_string(), + } + } + + #[must_use] + pub fn new_ephemeral(cas_id: &str) -> Self { + Self { + shard_hex: get_shard_hex(cas_id).to_string(), + cas_id: cas_id.to_string(), + base_directory_str: String::from(EPHEMERAL_DIR), + } + } } #[derive(Debug, Serialize, Deserialize, Type, Clone, Copy)] @@ -95,6 +136,41 @@ pub enum ThumbnailKind { Indexed(Uuid), } +impl ThumbnailKind { + pub fn compute_path(&self, data_directory: impl AsRef, cas_id: &str) -> PathBuf { + let mut thumb_path = get_thumbnails_directory(data_directory); + match self { + Self::Ephemeral => thumb_path.push(EPHEMERAL_DIR), + Self::Indexed(library_id) => { + thumb_path.push(library_id.to_string()); + } + } + thumb_path.push(get_shard_hex(cas_id)); + thumb_path.push(cas_id); + thumb_path.set_extension(WEBP_EXTENSION); + + thumb_path + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct GenerateThumbnailArgs { + pub extension: String, + pub cas_id: String, + pub path: PathBuf, +} + +impl GenerateThumbnailArgs { + #[must_use] + pub const fn new(extension: String, cas_id: String, path: PathBuf) -> Self { + Self { + extension, + cas_id, + path, + } + } +} + /// The practice of dividing files into hex coded folders, often called "sharding," /// is mainly used to optimize file system performance. File systems can start to slow down /// as the number of files in a directory increases. Thus, it's often beneficial to split @@ -105,18 +181,21 @@ pub enum ThumbnailKind { /// three characters of a the hash, this will give us 4096 (16^3) possible directories, /// named 000 to fff. #[inline] +#[must_use] pub fn get_shard_hex(cas_id: &str) -> &str { // Use the first three characters of the hash as the directory name &cas_id[0..3] } #[cfg(feature = "ffmpeg")] +#[must_use] pub const fn can_generate_thumbnail_for_video(video_extension: VideoExtension) -> bool { use VideoExtension::{Hevc, M2ts, M2v, Mpg, Mts, Swf, Ts}; // File extensions that are specifically not supported by the thumbnailer !matches!(video_extension, Mpg | Swf | M2v | Hevc | M2ts | Mts | Ts) } +#[must_use] pub const fn can_generate_thumbnail_for_image(image_extension: ImageExtension) -> bool { use ImageExtension::{ Avif, Bmp, Gif, Heic, Heics, Heif, Heifs, Ico, Jpeg, Jpg, Png, Svg, Webp, @@ -128,8 +207,242 @@ pub const fn can_generate_thumbnail_for_image(image_extension: ImageExtension) - ) } +#[must_use] pub const fn can_generate_thumbnail_for_document(document_extension: DocumentExtension) -> bool { use DocumentExtension::Pdf; matches!(document_extension, Pdf) } + +pub enum GenerationStatus { + Generated, + Skipped, +} + +pub async fn generate_thumbnail( + thumbnails_directory: &Path, + GenerateThumbnailArgs { + extension, + cas_id, + path, + }: &GenerateThumbnailArgs, + kind: &ThumbnailKind, + should_regenerate: bool, +) -> ( + Duration, + Result<(ThumbKey, GenerationStatus), thumbnailer::NonCriticalError>, +) { + trace!("Generating thumbnail for {}", path.display()); + let start = Instant::now(); + + let mut output_path = match kind { + ThumbnailKind::Ephemeral => thumbnails_directory.join(EPHEMERAL_DIR), + ThumbnailKind::Indexed(library_id) => thumbnails_directory.join(library_id.to_string()), + }; + + output_path.push(get_shard_hex(cas_id)); + output_path.push(cas_id); + output_path.set_extension(WEBP_EXTENSION); + + if let Err(e) = fs::metadata(&*output_path).await { + if e.kind() != io::ErrorKind::NotFound { + error!( + "Failed to check if thumbnail exists, but we will try to generate it anyway: {e:#?}" + ); + } + // Otherwise we good, thumbnail doesn't exist so we can generate it + } else if !should_regenerate { + trace!( + "Skipping thumbnail generation for {} because it already exists", + path.display() + ); + return ( + start.elapsed(), + Ok((ThumbKey::new(cas_id, kind), GenerationStatus::Skipped)), + ); + } + + if let Ok(extension) = ImageExtension::from_str(extension) { + if can_generate_thumbnail_for_image(extension) { + if let Err(e) = generate_image_thumbnail(&path, &output_path).await { + return (start.elapsed(), Err(e)); + } + } + } else if let Ok(extension) = DocumentExtension::from_str(extension) { + if can_generate_thumbnail_for_document(extension) { + if let Err(e) = generate_image_thumbnail(&path, &output_path).await { + return (start.elapsed(), Err(e)); + } + } + } + + #[cfg(feature = "ffmpeg")] + { + use crate::media_processor::helpers::thumbnailer::can_generate_thumbnail_for_video; + use sd_file_ext::extensions::VideoExtension; + + if let Ok(extension) = VideoExtension::from_str(extension) { + if can_generate_thumbnail_for_video(extension) { + if let Err(e) = generate_video_thumbnail(&path, &output_path).await { + return (start.elapsed(), Err(e)); + } + } + } + } + + trace!("Generated thumbnail for {}", path.display()); + + ( + start.elapsed(), + Ok((ThumbKey::new(cas_id, kind), GenerationStatus::Generated)), + ) +} + +async fn generate_image_thumbnail( + file_path: impl AsRef + Send, + output_path: impl AsRef + Send, +) -> Result<(), thumbnailer::NonCriticalError> { + let file_path = file_path.as_ref().to_path_buf(); + + let webp = spawn_blocking({ + let file_path = file_path.clone(); + + move || -> Result<_, thumbnailer::NonCriticalError> { + let mut img = format_image(&file_path).map_err(|e| { + thumbnailer::NonCriticalError::FormatImage(file_path.clone(), e.to_string()) + })?; + + let (w, h) = img.dimensions(); + + #[allow(clippy::cast_precision_loss)] + let (w_scaled, h_scaled) = scale_dimensions(w as f32, h as f32, TARGET_PX); + + // Optionally, resize the existing photo and convert back into DynamicImage + if w != w_scaled && h != h_scaled { + img = DynamicImage::ImageRgba8(imageops::resize( + &img, + w_scaled, + h_scaled, + imageops::FilterType::Triangle, + )); + } + + // this corrects the rotation/flip of the image based on the *available* exif data + // not all images have exif data, so we don't error. we also don't rotate HEIF as that's against the spec + if let Some(orientation) = Orientation::from_path(&file_path) { + if ConvertibleExtension::try_from(file_path.as_ref()) + .expect("we already checked if the image was convertible") + .should_rotate() + { + img = orientation.correct_thumbnail(img); + } + } + + // Create the WebP encoder for the above image + let encoder = Encoder::from_image(&img).map_err(|reason| { + thumbnailer::NonCriticalError::WebPEncoding(file_path, reason.to_string()) + })?; + + // Type `WebPMemory` is !Send, which makes the `Future` in this function `!Send`, + // this make us `deref` to have a `&[u8]` and then `to_owned` to make a `Vec` + // which implies on a unwanted clone... + Ok(encoder.encode(TARGET_QUALITY).deref().to_owned()) + } + }) + .await + .map_err(|e| { + thumbnailer::NonCriticalError::PanicWhileGeneratingThumbnail( + file_path.clone(), + e.to_string(), + ) + })??; + + let output_path = output_path.as_ref(); + + if let Some(shard_dir) = output_path.parent() { + fs::create_dir_all(shard_dir).await.map_err(|e| { + thumbnailer::NonCriticalError::CreateShardDirectory( + FileIOError::from((shard_dir, e)).to_string(), + ) + })?; + } else { + error!( + "Failed to get parent directory of '{}' for sharding parent directory", + output_path.display() + ); + } + + fs::write(output_path, &webp).await.map_err(|e| { + thumbnailer::NonCriticalError::SaveThumbnail( + file_path, + FileIOError::from((output_path, e)).to_string(), + ) + }) +} + +#[cfg(feature = "ffmpeg")] +async fn generate_video_thumbnail( + file_path: impl AsRef + Send, + output_path: impl AsRef + Send, +) -> Result<(), thumbnailer::NonCriticalError> { + use sd_ffmpeg::{to_thumbnail, ThumbnailSize}; + + let file_path = file_path.as_ref(); + + to_thumbnail( + file_path, + output_path, + ThumbnailSize::Scale(1024), + TARGET_QUALITY, + ) + .await + .map_err(|e| { + thumbnailer::NonCriticalError::VideoThumbnailGenerationFailed( + file_path.to_path_buf(), + e.to_string(), + ) + }) +} + +const ONE_SEC: Duration = Duration::from_secs(1); +static LAST_SINGLE_THUMB_GENERATED_LOCK: Lazy> = + Lazy::new(|| Mutex::new(Instant::now())); + +/// WARNING!!!! DON'T USE THIS FUNCTION IN A LOOP!!!!!!!!!!!!! It will be pretty slow on purpose! +pub async fn generate_single_thumbnail( + thumbnails_directory: impl AsRef + Send, + extension: String, + cas_id: String, + path: impl AsRef + Send, + kind: ThumbnailKind, +) -> Result<(), thumbnailer::NonCriticalError> { + let mut last_single_thumb_generated_guard = LAST_SINGLE_THUMB_GENERATED_LOCK.lock().await; + + let elapsed = Instant::now() - *last_single_thumb_generated_guard; + if elapsed < ONE_SEC { + // This will choke up in case someone try to use this method in a loop, otherwise + // it will consume all the machine resources like a gluton monster from hell + sleep(ONE_SEC - elapsed).await; + } + + let (_duration, res) = generate_thumbnail( + thumbnails_directory.as_ref(), + &GenerateThumbnailArgs { + extension, + cas_id, + path: path.as_ref().to_path_buf(), + }, + &kind, + false, + ) + .await; + + let (_thumb_key, status) = res?; + + if matches!(status, GenerationStatus::Generated) { + *last_single_thumb_generated_guard = Instant::now(); + drop(last_single_thumb_generated_guard); // Clippy was weirdly complaining about not doing an "early" drop here + } + + Ok(()) +} diff --git a/core/crates/heavy-lifting/src/media_processor/job.rs b/core/crates/heavy-lifting/src/media_processor/job.rs index a8e22cdb2595..896d0626bcf4 100644 --- a/core/crates/heavy-lifting/src/media_processor/job.rs +++ b/core/crates/heavy-lifting/src/media_processor/job.rs @@ -7,10 +7,12 @@ use crate::{ }, media_processor::{self, helpers::thumbnailer::THUMBNAIL_CACHE_DIR_NAME}, utils::sub_path::{self, maybe_get_iso_file_path_from_sub_path}, - Error, JobName, LocationScanState, OuterContext, ProgressUpdate, + Error, JobContext, JobName, LocationScanState, OuterContext, ProgressUpdate, }; + use sd_core_file_path_helper::IsolatedFilePathData; use sd_core_prisma_helpers::file_path_for_media_processor; +use sd_core_sync::Manager as SyncManager; use sd_file_ext::extensions::Extension; use sd_prisma::prisma::{location, PrismaClient}; @@ -97,13 +99,13 @@ pub struct MediaProcessor { impl Job for MediaProcessor { const NAME: JobName = JobName::MediaProcessor; - async fn resume_tasks( + async fn resume_tasks( &mut self, dispatcher: &JobTaskDispatcher, - ctx: &impl OuterContext, + ctx: &impl JobContext, SerializedTasks(serialized_tasks): SerializedTasks, ) -> Result<(), Error> { - let reporter = Arc::new(NewThumbnailsReporter { ctx: ctx.clone() }); + let reporter = NewThumbnailsReporter { ctx: ctx.clone() }; self.pending_tasks_on_resume = dispatcher .dispatch_many_boxed( @@ -111,24 +113,23 @@ impl Job for MediaProcessor { .map_err(media_processor::Error::from)? .into_iter() .map(|(task_kind, task_bytes)| { - let reporter = Arc::clone(&reporter); + let reporter = reporter.clone(); async move { match task_kind { TaskKind::MediaDataExtractor => { tasks::MediaDataExtractor::deserialize( &task_bytes, - Arc::clone(ctx.db()), + (Arc::clone(ctx.db()), Arc::clone(ctx.sync())), ) .await .map(IntoTask::into_task) } - TaskKind::Thumbnailer => tasks::Thumbnailer::deserialize( - &task_bytes, - Arc::clone(&reporter), - ) - .await - .map(IntoTask::into_task), + TaskKind::Thumbnailer => { + tasks::Thumbnailer::deserialize(&task_bytes, reporter) + .await + .map(IntoTask::into_task) + } } } }) @@ -142,10 +143,10 @@ impl Job for MediaProcessor { Ok(()) } - async fn run( + async fn run( mut self, dispatcher: JobTaskDispatcher, - ctx: Ctx, + ctx: impl JobContext, ) -> Result { let mut pending_running_tasks = FuturesUnordered::new(); @@ -158,7 +159,7 @@ impl Job for MediaProcessor { if !self.tasks_for_shutdown.is_empty() { return Ok(ReturnStatus::Shutdown( - SerializableJob::::serialize(self).await, + SerializableJob::::serialize(self).await, )); } @@ -215,10 +216,10 @@ impl MediaProcessor { }) } - async fn init_or_resume( + async fn init_or_resume( &mut self, pending_running_tasks: &mut FuturesUnordered>, - ctx: &impl OuterContext, + job_ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Result<(), media_processor::Error> { // if we don't have any pending task, then this is a fresh job @@ -230,7 +231,7 @@ impl MediaProcessor { location_id, &self.sub_path, &*self.location_path, - ctx.db(), + job_ctx.db(), ) .await? .map_or_else( @@ -248,7 +249,8 @@ impl MediaProcessor { // First we will dispatch all tasks for media data extraction so we have a nice reporting let (total_media_data_extraction_files, task_handles) = dispatch_media_data_extractor_tasks( - ctx.db(), + job_ctx.db(), + job_ctx.sync(), &iso_file_path, &self.location_path, dispatcher, @@ -258,14 +260,16 @@ impl MediaProcessor { pending_running_tasks.extend(task_handles); - ctx.progress(vec![ - ProgressUpdate::TaskCount(total_media_data_extraction_files), - ProgressUpdate::Phase(self.phase.to_string()), - ProgressUpdate::Message(format!( + job_ctx + .progress(vec![ + ProgressUpdate::TaskCount(total_media_data_extraction_files), + ProgressUpdate::Phase(self.phase.to_string()), + ProgressUpdate::Message(format!( "Preparing to process {total_media_data_extraction_files} files in {} chunks", self.total_media_data_extraction_tasks )), - ]); + ]) + .await; // Now we dispatch thumbnailer tasks let (total_thumbnailer_tasks, task_handles) = dispatch_thumbnailer_tasks( @@ -273,7 +277,7 @@ impl MediaProcessor { self.regenerate_thumbnails, &self.location_path, dispatcher, - ctx, + job_ctx, ) .await?; pending_running_tasks.extend(task_handles); @@ -286,15 +290,15 @@ impl MediaProcessor { Ok(()) } - async fn process_handles( + async fn process_handles( &mut self, pending_running_tasks: &mut FuturesUnordered>, - ctx: &impl OuterContext, + job_ctx: &impl JobContext, ) -> Option> { while let Some(task) = pending_running_tasks.next().await { match task { Ok(TaskStatus::Done((task_id, TaskOutput::Out(out)))) => { - self.process_task_output(task_id, out, ctx); + self.process_task_output(task_id, out, job_ctx).await; } Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => { @@ -328,11 +332,11 @@ impl MediaProcessor { None } - fn process_task_output( + async fn process_task_output( &mut self, task_id: uuid::Uuid, any_task_output: Box, - ctx: &impl OuterContext, + job_ctx: &impl JobContext, ) { if any_task_output.is::() { let media_data_extractor::Output { @@ -360,10 +364,12 @@ impl MediaProcessor { self.metadata.media_data_metrics.total_successful_tasks, self.total_media_data_extraction_tasks ); - ctx.progress(vec![ProgressUpdate::CompletedTaskCount( - self.metadata.media_data_metrics.extracted - + self.metadata.media_data_metrics.skipped, - )]); + job_ctx + .progress(vec![ProgressUpdate::CompletedTaskCount( + self.metadata.media_data_metrics.extracted + + self.metadata.media_data_metrics.skipped, + )]) + .await; if self.total_media_data_extraction_tasks == self.metadata.media_data_metrics.total_successful_tasks @@ -372,14 +378,16 @@ impl MediaProcessor { self.phase = Phase::ThumbnailGeneration; - ctx.progress(vec![ - ProgressUpdate::TaskCount(self.total_thumbnailer_files), - ProgressUpdate::Phase(self.phase.to_string()), - ProgressUpdate::Message(format!( - "Waiting for processing of {} thumbnails in {} tasks", - self.total_thumbnailer_files, self.total_thumbnailer_tasks - )), - ]); + job_ctx + .progress(vec![ + ProgressUpdate::TaskCount(self.total_thumbnailer_files), + ProgressUpdate::Phase(self.phase.to_string()), + ProgressUpdate::Message(format!( + "Waiting for processing of {} thumbnails in {} tasks", + self.total_thumbnailer_files, self.total_thumbnailer_tasks + )), + ]) + .await; } } else if any_task_output.is::() { let thumbnailer::Output { @@ -400,10 +408,12 @@ impl MediaProcessor { self.errors.extend(errors); - ctx.progress(vec![ProgressUpdate::CompletedTaskCount( - self.metadata.thumbnailer_metrics_acc.generated - + self.metadata.thumbnailer_metrics_acc.skipped, - )]); + job_ctx + .progress(vec![ProgressUpdate::CompletedTaskCount( + self.metadata.thumbnailer_metrics_acc.generated + + self.metadata.thumbnailer_metrics_acc.skipped, + )]) + .await; // if self.total_thumbnailer_tasks // == self.metadata.thumbnailer_metrics_acc.total_successful_tasks @@ -419,7 +429,7 @@ impl MediaProcessor { // "Waiting for processing of {} labels in {} tasks", // self.total_labeller_files, self.total_labeller_tasks // )), - // ]); + // ]).await; // } } else { unreachable!("Unexpected task output type: "); @@ -541,7 +551,7 @@ struct SaveState { tasks_for_shutdown_bytes: Option, } -impl SerializableJob for MediaProcessor { +impl SerializableJob for MediaProcessor { async fn serialize(self) -> Result>, rmp_serde::encode::Error> { let Self { location, @@ -578,8 +588,8 @@ impl SerializableJob for MediaProcessor { .serialize() .await .map(|bytes| (TaskKind::MediaDataExtractor, bytes)) - } else if task.is::>>() { - task.downcast::>>() + } else if task.is::>>() { + task.downcast::>>() .expect("just checked") .serialize() .await @@ -599,7 +609,7 @@ impl SerializableJob for MediaProcessor { async fn deserialize( serialized_job: &[u8], - _: &Ctx, + _: &OuterCtx, ) -> Result)>, rmp_serde::decode::Error> { let SaveState { location, @@ -646,6 +656,7 @@ impl Hash for MediaProcessor { async fn dispatch_media_data_extractor_tasks( db: &Arc, + sync: &Arc, parent_iso_file_path: &IsolatedFilePathData<'_>, location_path: &Arc, dispatcher: &JobTaskDispatcher, @@ -678,6 +689,7 @@ async fn dispatch_media_data_extractor_tasks( parent_iso_file_path.location_id(), Arc::clone(location_path), Arc::clone(db), + Arc::clone(sync), ) }) .map(IntoTask::into_task) @@ -693,6 +705,7 @@ async fn dispatch_media_data_extractor_tasks( parent_iso_file_path.location_id(), Arc::clone(location_path), Arc::clone(db), + Arc::clone(sync), ) }) .map(IntoTask::into_task), @@ -752,7 +765,7 @@ async fn dispatch_thumbnailer_tasks( let location_id = parent_iso_file_path.location_id(); let library_id = ctx.id(); let db = ctx.db(); - let reporter = Arc::new(NewThumbnailsReporter { ctx: ctx.clone() }); + let reporter = NewThumbnailsReporter { ctx: ctx.clone() }; let mut file_paths = get_all_children_files_by_extensions( db, @@ -784,7 +797,7 @@ async fn dispatch_thumbnailer_tasks( library_id, should_regenerate, false, - Arc::clone(&reporter), + reporter.clone(), ) }) .map(IntoTask::into_task) @@ -804,7 +817,7 @@ async fn dispatch_thumbnailer_tasks( library_id, should_regenerate, true, - Arc::clone(&reporter), + reporter.clone(), ) }) .map(IntoTask::into_task) diff --git a/core/crates/heavy-lifting/src/media_processor/mod.rs b/core/crates/heavy-lifting/src/media_processor/mod.rs index 7197e686fa63..800763b02a61 100644 --- a/core/crates/heavy-lifting/src/media_processor/mod.rs +++ b/core/crates/heavy-lifting/src/media_processor/mod.rs @@ -19,7 +19,14 @@ pub use tasks::{ thumbnailer::{self, Thumbnailer}, }; -pub use helpers::thumbnailer::{ThumbKey, ThumbnailKind}; +pub use helpers::{ + exif_media_data, ffmpeg_media_data, + thumbnailer::{ + can_generate_thumbnail_for_document, can_generate_thumbnail_for_image, + can_generate_thumbnail_for_video, generate_single_thumbnail, get_shard_hex, + get_thumbnails_directory, GenerateThumbnailArgs, ThumbKey, ThumbnailKind, WEBP_EXTENSION, + }, +}; pub use shallow::shallow; use self::thumbnailer::NewThumbnailReporter; @@ -55,19 +62,20 @@ pub enum NonCriticalError { Thumbnailer(#[from] thumbnailer::NonCriticalError), } -struct NewThumbnailsReporter { - ctx: Ctx, +#[derive(Clone)] +pub struct NewThumbnailsReporter { + pub ctx: OuterCtx, } -impl fmt::Debug for NewThumbnailsReporter { +impl fmt::Debug for NewThumbnailsReporter { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("NewThumbnailsReporter").finish() } } -impl NewThumbnailReporter for NewThumbnailsReporter { +impl NewThumbnailReporter for NewThumbnailsReporter { fn new_thumbnail(&self, thumb_key: ThumbKey) { self.ctx - .report_update(UpdateEvent::NewThumbnailEvent { thumb_key }); + .report_update(UpdateEvent::NewThumbnail { thumb_key }); } } diff --git a/core/crates/heavy-lifting/src/media_processor/shallow.rs b/core/crates/heavy-lifting/src/media_processor/shallow.rs index 2f3cd8322f1b..5e5d1522303d 100644 --- a/core/crates/heavy-lifting/src/media_processor/shallow.rs +++ b/core/crates/heavy-lifting/src/media_processor/shallow.rs @@ -5,6 +5,7 @@ use crate::{ use sd_core_file_path_helper::IsolatedFilePathData; use sd_core_prisma_helpers::file_path_for_media_processor; +use sd_core_sync::Manager as SyncManager; use sd_file_ext::extensions::Extension; use sd_prisma::prisma::{location, PrismaClient}; @@ -67,6 +68,7 @@ pub async fn shallow( let mut futures = dispatch_media_data_extractor_tasks( ctx.db(), + ctx.sync(), &sub_iso_file_path, &location_path, &dispatcher, @@ -120,6 +122,7 @@ pub async fn shallow( async fn dispatch_media_data_extractor_tasks( db: &Arc, + sync: &Arc, parent_iso_file_path: &IsolatedFilePathData<'_>, location_path: &Arc, dispatcher: &BaseTaskDispatcher, @@ -150,6 +153,7 @@ async fn dispatch_media_data_extractor_tasks( parent_iso_file_path.location_id(), Arc::clone(location_path), Arc::clone(db), + Arc::clone(sync), ) }) .map(IntoTask::into_task) @@ -165,6 +169,7 @@ async fn dispatch_media_data_extractor_tasks( parent_iso_file_path.location_id(), Arc::clone(location_path), Arc::clone(db), + Arc::clone(sync), ) }) .map(IntoTask::into_task), @@ -220,7 +225,7 @@ async fn dispatch_thumbnailer_tasks( let location_id = parent_iso_file_path.location_id(); let library_id = ctx.id(); let db = ctx.db(); - let reporter = Arc::new(NewThumbnailsReporter { ctx: ctx.clone() }); + let reporter = NewThumbnailsReporter { ctx: ctx.clone() }; let file_paths = get_files_by_extensions( db, @@ -243,7 +248,7 @@ async fn dispatch_thumbnailer_tasks( library_id, should_regenerate, true, - Arc::clone(&reporter), + reporter.clone(), ) }) .map(IntoTask::into_task) diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs b/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs index 4a5f6661f6dc..fe7d713d10e7 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs @@ -8,6 +8,7 @@ use crate::{ use sd_core_file_path_helper::IsolatedFilePathData; use sd_core_prisma_helpers::file_path_for_media_processor; +use sd_core_sync::Manager as SyncManager; use sd_media_metadata::{ExifMetadata, FFmpegMetadata}; use sd_prisma::prisma::{exif_data, ffmpeg_data, file_path, location, object, PrismaClient}; @@ -15,6 +16,8 @@ use sd_task_system::{ check_interruption, ExecStatus, Interrupter, InterruptionKind, IntoAnyTaskOutput, SerializableTask, Task, TaskId, }; +use sd_utils::from_bytes_to_uuid; +use uuid::Uuid; use std::{ collections::{HashMap, HashSet}, @@ -47,6 +50,7 @@ pub struct MediaDataExtractor { location_path: Arc, stage: Stage, db: Arc, + sync: Arc, output: Output, } @@ -55,13 +59,13 @@ enum Stage { Starting, FetchedObjectsAlreadyWithMediaData(Vec), ExtractingMediaData { - paths_by_id: HashMap, - exif_media_datas: Vec<(ExifMetadata, object::id::Type)>, + paths_by_id: HashMap, + exif_media_datas: Vec<(ExifMetadata, object::id::Type, Uuid)>, ffmpeg_media_datas: Vec<(FFmpegMetadata, object::id::Type)>, extract_ids_to_remove_from_map: Vec, }, SaveMediaData { - exif_media_datas: Vec<(ExifMetadata, object::id::Type)>, + exif_media_datas: Vec<(ExifMetadata, object::id::Type, Uuid)>, ffmpeg_media_datas: Vec<(FFmpegMetadata, object::id::Type)>, }, } @@ -73,6 +77,7 @@ impl MediaDataExtractor { location_id: location::id::Type, location_path: Arc, db: Arc, + sync: Arc, ) -> Self { let mut output = Output::default(); @@ -82,7 +87,7 @@ impl MediaDataExtractor { file_paths: file_paths .iter() .filter(|file_path| { - if file_path.object_id.is_some() { + if file_path.object.is_some() { true } else { output.errors.push( @@ -100,6 +105,7 @@ impl MediaDataExtractor { location_path, stage: Stage::Starting, db, + sync, output, } } @@ -110,8 +116,9 @@ impl MediaDataExtractor { location_id: location::id::Type, location_path: Arc, db: Arc, + sync: Arc, ) -> Self { - Self::new(Kind::Exif, file_paths, location_id, location_path, db) + Self::new(Kind::Exif, file_paths, location_id, location_path, db, sync) } #[must_use] @@ -120,8 +127,16 @@ impl MediaDataExtractor { location_id: location::id::Type, location_path: Arc, db: Arc, + sync: Arc, ) -> Self { - Self::new(Kind::FFmpeg, file_paths, location_id, location_path, db) + Self::new( + Kind::FFmpeg, + file_paths, + location_id, + location_path, + db, + sync, + ) } } @@ -241,8 +256,14 @@ impl Task for MediaDataExtractor { ffmpeg_media_datas, } => { let db_write_start = Instant::now(); - self.output.extracted = - save(self.kind, exif_media_datas, ffmpeg_media_datas, &self.db).await?; + self.output.extracted = save( + self.kind, + exif_media_datas, + ffmpeg_media_datas, + &self.db, + &self.sync, + ) + .await?; self.output.db_write_time = db_write_start.elapsed(); self.output.skipped += self.output.errors.len() as u64; @@ -295,7 +316,7 @@ impl SerializableTask for MediaDataExtractor { type DeserializeError = rmp_serde::decode::Error; - type DeserializeCtx = Arc; + type DeserializeCtx = (Arc, Arc); async fn serialize(self) -> Result, Self::SerializeError> { let Self { @@ -322,7 +343,7 @@ impl SerializableTask for MediaDataExtractor { async fn deserialize( data: &[u8], - db: Self::DeserializeCtx, + (db, sync): Self::DeserializeCtx, ) -> Result { rmp_serde::from_slice(data).map( |SaveState { @@ -341,6 +362,7 @@ impl SerializableTask for MediaDataExtractor { location_path, stage, db, + sync, output, }, ) @@ -355,7 +377,7 @@ async fn fetch_objects_already_with_media_data( ) -> Result, media_processor::Error> { let object_ids = file_paths .iter() - .filter_map(|file_path| file_path.object_id) + .filter_map(|file_path| file_path.object.as_ref().map(|object| object.id)) .collect(); match kind { @@ -388,7 +410,7 @@ fn filter_files_to_extract_media_data( Output { skipped, errors, .. }: &mut Output, -) -> HashMap { +) -> HashMap { let unique_objects_already_with_media_data = objects_already_with_media_data .into_iter() .collect::>(); @@ -397,7 +419,7 @@ fn filter_files_to_extract_media_data( file_paths.retain(|file_path| { !unique_objects_already_with_media_data - .contains(&file_path.object_id.expect("already checked")) + .contains(&file_path.object.as_ref().expect("already checked").id) }); file_paths @@ -416,11 +438,14 @@ fn filter_files_to_extract_media_data( ); }) .map(|iso_file_path| { + let object = file_path.object.as_ref().expect("already checked"); + ( file_path.id, ( location_path.join(iso_file_path), - file_path.object_id.expect("already checked"), + object.id, + from_bytes_to_uuid(&object.pub_id), ), ) }) @@ -437,6 +462,7 @@ enum ExtractionOutputKind { struct ExtractionOutput { file_path_id: file_path::id::Type, object_id: object::id::Type, + object_pub_id: Uuid, kind: ExtractionOutputKind, } @@ -453,23 +479,28 @@ enum InterruptRace { #[inline] fn prepare_extraction_futures<'a>( kind: Kind, - paths_by_id: &'a HashMap, + paths_by_id: &'a HashMap, interrupter: &'a Interrupter, ) -> FutureGroup + 'a> { paths_by_id .iter() - .map(|(file_path_id, (path, object_id))| async move { - InterruptRace::Processed(ExtractionOutput { - file_path_id: *file_path_id, - object_id: *object_id, - kind: match kind { - Kind::Exif => ExtractionOutputKind::Exif(exif_media_data::extract(path).await), - Kind::FFmpeg => { - ExtractionOutputKind::FFmpeg(ffmpeg_media_data::extract(path).await) - } - }, - }) - }) + .map( + |(file_path_id, (path, object_id, object_pub_id))| async move { + InterruptRace::Processed(ExtractionOutput { + file_path_id: *file_path_id, + object_id: *object_id, + object_pub_id: *object_pub_id, + kind: match kind { + Kind::Exif => { + ExtractionOutputKind::Exif(exif_media_data::extract(path).await) + } + Kind::FFmpeg => { + ExtractionOutputKind::FFmpeg(ffmpeg_media_data::extract(path).await) + } + }, + }) + }, + ) .map(|fut| { ( fut, @@ -485,16 +516,17 @@ fn process_output( ExtractionOutput { file_path_id, object_id, + object_pub_id, kind, }: ExtractionOutput, - exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type)>, + exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type, Uuid)>, ffmpeg_media_datas: &mut Vec<(FFmpegMetadata, object::id::Type)>, extract_ids_to_remove_from_map: &mut Vec, output: &mut Output, ) { match kind { ExtractionOutputKind::Exif(Ok(Some(exif_data))) => { - exif_media_datas.push((exif_data, object_id)); + exif_media_datas.push((exif_data, object_id, object_pub_id)); } ExtractionOutputKind::Exif(Ok(None)) => { // No exif media data found @@ -514,12 +546,14 @@ fn process_output( #[inline] async fn save( kind: Kind, - exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type)>, + exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type, Uuid)>, ffmpeg_media_datas: &mut Vec<(FFmpegMetadata, object::id::Type)>, db: &PrismaClient, + sync: &SyncManager, ) -> Result { match kind { - Kind::Exif => exif_media_data::save(mem::take(exif_media_datas), db).await, + Kind::Exif => exif_media_data::save(mem::take(exif_media_datas), db, sync).await, Kind::FFmpeg => ffmpeg_media_data::save(mem::take(ffmpeg_media_datas), db).await, } + .map_err(Into::into) } diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs index c04fb6c553e1..9e41eb14d1ff 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs @@ -12,8 +12,8 @@ use crate::{ media_processor::{ self, helpers::thumbnailer::{ - can_generate_thumbnail_for_document, can_generate_thumbnail_for_image, get_shard_hex, - EPHEMERAL_DIR, TARGET_PX, TARGET_QUALITY, THUMBNAIL_GENERATION_TIMEOUT, WEBP_EXTENSION, + generate_thumbnail, GenerateThumbnailArgs, GenerationStatus, + THUMBNAIL_GENERATION_TIMEOUT, }, ThumbKey, ThumbnailKind, }, @@ -23,59 +23,30 @@ use crate::{ use sd_core_file_path_helper::IsolatedFilePathData; use sd_core_prisma_helpers::file_path_for_media_processor; -use sd_file_ext::extensions::{DocumentExtension, ImageExtension}; -use sd_images::{format_image, scale_dimensions, ConvertibleExtension}; -use sd_media_metadata::exif::Orientation; use sd_prisma::prisma::{file_path, location}; use sd_task_system::{ ExecStatus, Interrupter, InterruptionKind, IntoAnyTaskOutput, SerializableTask, Task, TaskId, }; -use sd_utils::error::FileIOError; use std::{ collections::HashMap, fmt, future::IntoFuture, mem, - ops::Deref, path::{Path, PathBuf}, pin::pin, - str::FromStr, sync::Arc, time::Duration, }; use futures::{FutureExt, StreamExt}; use futures_concurrency::future::{FutureGroup, Race}; -use image::{imageops, DynamicImage, GenericImageView}; + use serde::{Deserialize, Serialize}; use specta::Type; -use tokio::{ - fs, io, - task::spawn_blocking, - time::{sleep, Instant}, -}; -use tracing::{error, info, trace}; +use tokio::time::{sleep, Instant}; +use tracing::{error, info}; use uuid::Uuid; -use webp::Encoder; - -#[derive(Debug, Serialize, Deserialize)] -pub struct GenerateThumbnailArgs { - pub extension: String, - pub cas_id: String, - pub path: PathBuf, -} - -impl GenerateThumbnailArgs { - #[must_use] - pub const fn new(extension: String, cas_id: String, path: PathBuf) -> Self { - Self { - extension, - cas_id, - path, - } - } -} pub type ThumbnailId = u32; @@ -86,7 +57,7 @@ pub trait NewThumbnailReporter: Send + Sync + fmt::Debug + 'static { #[derive(Debug)] pub struct Thumbnailer { id: TaskId, - reporter: Arc, + reporter: Reporter, thumbs_kind: ThumbnailKind, thumbnails_directory_path: Arc, thumbnails_to_generate: HashMap, @@ -173,7 +144,7 @@ impl Task for Thumbnailer { InterruptRace::Processed(out) => process_thumbnail_generation_output( out, *with_priority, - reporter.as_ref(), + reporter, already_processed_ids, output, ), @@ -254,7 +225,7 @@ impl Thumbnailer { errors: Vec, should_regenerate: bool, with_priority: bool, - reporter: Arc, + reporter: Reporter, ) -> Self { Self { id: TaskId::new_v4(), @@ -276,7 +247,7 @@ impl Thumbnailer { pub fn new_ephemeral( thumbnails_directory_path: Arc, thumbnails_to_generate: Vec, - reporter: Arc, + reporter: Reporter, ) -> Self { Self::new( ThumbnailKind::Ephemeral, @@ -308,7 +279,7 @@ impl Thumbnailer { library_id: Uuid, should_regenerate: bool, with_priority: bool, - reporter: Arc, + reporter: Reporter, ) -> Self { let mut errors = Vec::new(); @@ -385,7 +356,7 @@ impl SerializableTask for Thumbnailer; + type DeserializeCtx = Reporter; async fn serialize(self) -> Result, Self::SerializeError> { let Self { @@ -443,11 +414,6 @@ impl SerializableTask for Thumbnailer ( - Duration, - Result<(ThumbKey, GenerationStatus), NonCriticalError>, -) { - trace!("Generating thumbnail for {}", path.display()); - let start = Instant::now(); - - let mut output_path = match kind { - ThumbnailKind::Ephemeral => thumbnails_directory.join(EPHEMERAL_DIR), - ThumbnailKind::Indexed(library_id) => thumbnails_directory.join(library_id.to_string()), - }; - - output_path.push(get_shard_hex(cas_id)); - output_path.push(cas_id); - output_path.set_extension(WEBP_EXTENSION); - - if let Err(e) = fs::metadata(&*output_path).await { - if e.kind() != io::ErrorKind::NotFound { - error!( - "Failed to check if thumbnail exists, but we will try to generate it anyway: {e:#?}" - ); - } - // Otherwise we good, thumbnail doesn't exist so we can generate it - } else if !should_regenerate { - trace!( - "Skipping thumbnail generation for {} because it already exists", - path.display() - ); - return ( - start.elapsed(), - Ok((ThumbKey::new(cas_id, kind), GenerationStatus::Skipped)), - ); - } - - if let Ok(extension) = ImageExtension::from_str(extension) { - if can_generate_thumbnail_for_image(extension) { - if let Err(e) = generate_image_thumbnail(&path, &output_path).await { - return (start.elapsed(), Err(e)); - } - } - } else if let Ok(extension) = DocumentExtension::from_str(extension) { - if can_generate_thumbnail_for_document(extension) { - if let Err(e) = generate_image_thumbnail(&path, &output_path).await { - return (start.elapsed(), Err(e)); - } - } - } - - #[cfg(feature = "ffmpeg")] - { - use crate::media_processor::helpers::thumbnailer::can_generate_thumbnail_for_video; - use sd_file_ext::extensions::VideoExtension; - - if let Ok(extension) = VideoExtension::from_str(extension) { - if can_generate_thumbnail_for_video(extension) { - if let Err(e) = generate_video_thumbnail(&path, &output_path).await { - return (start.elapsed(), Err(e)); - } - } - } - } - - trace!("Generated thumbnail for {}", path.display()); - - ( - start.elapsed(), - Ok((ThumbKey::new(cas_id, kind), GenerationStatus::Generated)), - ) -} - -async fn generate_image_thumbnail( - file_path: impl AsRef + Send, - output_path: impl AsRef + Send, -) -> Result<(), NonCriticalError> { - let file_path = file_path.as_ref().to_path_buf(); - - let webp = spawn_blocking({ - let file_path = file_path.clone(); - - move || -> Result<_, NonCriticalError> { - let mut img = format_image(&file_path) - .map_err(|e| NonCriticalError::FormatImage(file_path.clone(), e.to_string()))?; - - let (w, h) = img.dimensions(); - - #[allow(clippy::cast_precision_loss)] - let (w_scaled, h_scaled) = scale_dimensions(w as f32, h as f32, TARGET_PX); - - // Optionally, resize the existing photo and convert back into DynamicImage - if w != w_scaled && h != h_scaled { - img = DynamicImage::ImageRgba8(imageops::resize( - &img, - w_scaled, - h_scaled, - imageops::FilterType::Triangle, - )); - } - - // this corrects the rotation/flip of the image based on the *available* exif data - // not all images have exif data, so we don't error. we also don't rotate HEIF as that's against the spec - if let Some(orientation) = Orientation::from_path(&file_path) { - if ConvertibleExtension::try_from(file_path.as_ref()) - .expect("we already checked if the image was convertible") - .should_rotate() - { - img = orientation.correct_thumbnail(img); - } - } - - // Create the WebP encoder for the above image - let encoder = Encoder::from_image(&img) - .map_err(|reason| NonCriticalError::WebPEncoding(file_path, reason.to_string()))?; - - // Type `WebPMemory` is !Send, which makes the `Future` in this function `!Send`, - // this make us `deref` to have a `&[u8]` and then `to_owned` to make a `Vec` - // which implies on a unwanted clone... - Ok(encoder.encode(TARGET_QUALITY).deref().to_owned()) - } - }) - .await - .map_err(|e| { - NonCriticalError::PanicWhileGeneratingThumbnail(file_path.clone(), e.to_string()) - })??; - - let output_path = output_path.as_ref(); - - if let Some(shard_dir) = output_path.parent() { - fs::create_dir_all(shard_dir).await.map_err(|e| { - NonCriticalError::CreateShardDirectory(FileIOError::from((shard_dir, e)).to_string()) - })?; - } else { - error!( - "Failed to get parent directory of '{}' for sharding parent directory", - output_path.display() - ); - } - - fs::write(output_path, &webp).await.map_err(|e| { - NonCriticalError::SaveThumbnail(file_path, FileIOError::from((output_path, e)).to_string()) - }) -} - -#[cfg(feature = "ffmpeg")] -async fn generate_video_thumbnail( - file_path: impl AsRef + Send, - output_path: impl AsRef + Send, -) -> Result<(), NonCriticalError> { - use sd_ffmpeg::{to_thumbnail, ThumbnailSize}; - - let file_path = file_path.as_ref(); - - to_thumbnail( - file_path, - output_path, - ThumbnailSize::Scale(1024), - TARGET_QUALITY, - ) - .await - .map_err(|e| { - NonCriticalError::VideoThumbnailGenerationFailed(file_path.to_path_buf(), e.to_string()) - }) -} diff --git a/core/crates/prisma-helpers/src/lib.rs b/core/crates/prisma-helpers/src/lib.rs index d562ab534952..c7c214417e01 100644 --- a/core/crates/prisma-helpers/src/lib.rs +++ b/core/crates/prisma-helpers/src/lib.rs @@ -62,7 +62,10 @@ file_path::select!(file_path_for_media_processor { name extension cas_id - object_id + object: select { + id + pub_id + } }); file_path::select!(file_path_to_isolate { location_id diff --git a/core/src/api/ephemeral_files.rs b/core/src/api/ephemeral_files.rs index f16982bcbacf..2a9c430ab73d 100644 --- a/core/src/api/ephemeral_files.rs +++ b/core/src/api/ephemeral_files.rs @@ -7,11 +7,13 @@ use crate::{ library::Library, object::{ fs::{error::FileSystemJobsError, find_available_filename_for_duplicate}, - media::exif_metadata_extractor::{can_extract_exif_data_for_image, extract_exif_data}, + // media::exif_metadata_extractor::{can_extract_exif_data_for_image, extract_exif_data}, }, }; use sd_core_file_path_helper::IsolatedFilePathData; +use sd_core_heavy_lifting::media_processor::exif_media_data; + use sd_file_ext::{ extensions::{Extension, ImageExtension}, kind::ObjectKind, @@ -71,11 +73,11 @@ pub(crate) fn mount() -> AlphaRouter { ) })?; - if !can_extract_exif_data_for_image(&image_extension) { + if !exif_media_data::can_extract(image_extension) { return Ok(None); } - let exif_data = extract_exif_data(full_path) + let exif_data = exif_media_data::extract(full_path) .await .map_err(|e| { rspc::Error::with_cause( diff --git a/core/src/api/files.rs b/core/src/api/files.rs index d83d1e2bd073..c7714b890b97 100644 --- a/core/src/api/files.rs +++ b/core/src/api/files.rs @@ -9,12 +9,13 @@ use crate::{ old_copy::OldFileCopierJobInit, old_cut::OldFileCutterJobInit, old_delete::OldFileDeleterJobInit, old_erase::OldFileEraserJobInit, }, - media::{exif_media_data_from_prisma_data, ffmpeg_data_from_prisma_data}, + // media::{exif_media_data_from_prisma_data, ffmpeg_data_from_prisma_data}, }, - old_job::Job, + old_job::OldJob, }; use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData}; +use sd_core_heavy_lifting::media_processor::{exif_media_data, ffmpeg_media_data}; use sd_core_prisma_helpers::{ file_path_to_isolate, file_path_to_isolate_with_id, object_with_file_paths, object_with_media_data, @@ -127,13 +128,13 @@ pub(crate) fn mount() -> AlphaRouter { .and_then(|obj| { Some(match obj.kind { Some(v) if v == ObjectKind::Image as i32 => MediaData::Exif( - exif_media_data_from_prisma_data(obj.exif_data?), + exif_media_data::from_prisma_data(obj.exif_data?), ), Some(v) if v == ObjectKind::Audio as i32 || v == ObjectKind::Video as i32 => { - MediaData::FFmpeg(ffmpeg_data_from_prisma_data( + MediaData::FFmpeg(ffmpeg_media_data::from_prisma_data( obj.ffmpeg_data?, )) } @@ -495,7 +496,7 @@ pub(crate) fn mount() -> AlphaRouter { } } } - _ => Job::new(args) + _ => OldJob::new(args) .spawn(&node, &library) .await .map_err(Into::into), @@ -550,7 +551,7 @@ pub(crate) fn mount() -> AlphaRouter { Ok(()) } - _ => Job::new(args) + _ => OldJob::new(args) .spawn(&node, &library) .await .map_err(Into::into), @@ -696,7 +697,7 @@ pub(crate) fn mount() -> AlphaRouter { .procedure("eraseFiles", { R.with2(library()) .mutation(|(node, library), args: OldFileEraserJobInit| async move { - Job::new(args) + OldJob::new(args) .spawn(&node, &library) .await .map_err(Into::into) @@ -705,7 +706,7 @@ pub(crate) fn mount() -> AlphaRouter { .procedure("copyFiles", { R.with2(library()) .mutation(|(node, library), args: OldFileCopierJobInit| async move { - Job::new(args) + OldJob::new(args) .spawn(&node, &library) .await .map_err(Into::into) @@ -714,7 +715,7 @@ pub(crate) fn mount() -> AlphaRouter { .procedure("cutFiles", { R.with2(library()) .mutation(|(node, library), args: OldFileCutterJobInit| async move { - Job::new(args) + OldJob::new(args) .spawn(&node, &library) .await .map_err(Into::into) diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index 6c3f57d6b57d..666c34f5d834 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -1,12 +1,8 @@ use crate::{ invalidate_query, location::{find_location, LocationError}, - object::{ - media::OldMediaProcessorJobInit, - old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit, - validation::old_validator_job::OldObjectValidatorJobInit, - }, - old_job::{Job, JobReport, JobStatus, OldJobs}, + object::validation::old_validator_job::OldObjectValidatorJobInit, + old_job::{JobReport, JobStatus, OldJob, OldJobs}, }; use sd_core_prisma_helpers::job_without_data; @@ -250,7 +246,7 @@ pub(crate) fn mount() -> AlphaRouter { return Err(LocationError::IdNotFound(id).into()); }; - Job::new(OldMediaProcessorJobInit { + OldJob::new(OldMediaProcessorJobInit { location, sub_path: Some(path), regenerate_thumbnails: regenerate, @@ -282,7 +278,7 @@ pub(crate) fn mount() -> AlphaRouter { return Err(LocationError::IdNotFound(id).into()); }; - Job::new(OldMediaProcessorJobInit { + OldJob::new(OldMediaProcessorJobInit { location, sub_path: Some(path), regenerate_thumbnails: false, @@ -307,7 +303,7 @@ pub(crate) fn mount() -> AlphaRouter { return Err(LocationError::IdNotFound(args.id).into()); }; - Job::new(OldObjectValidatorJobInit { + OldJob::new(OldObjectValidatorJobInit { location, sub_path: Some(args.path), }) @@ -329,7 +325,7 @@ pub(crate) fn mount() -> AlphaRouter { return Err(LocationError::IdNotFound(args.id).into()); }; - Job::new(OldFileIdentifierJobInit { + OldJob::new(OldFileIdentifierJobInit { location, sub_path: Some(args.path), }) diff --git a/core/src/api/labels.rs b/core/src/api/labels.rs index 0e5249c592fa..18ee29a96b6b 100644 --- a/core/src/api/labels.rs +++ b/core/src/api/labels.rs @@ -1,7 +1,6 @@ -use crate::{ - invalidate_query, library::Library, object::media::old_thumbnail::get_indexed_thumb_key, -}; +use crate::{invalidate_query, library::Library}; +use sd_core_heavy_lifting::media_processor::ThumbKey; use sd_core_prisma_helpers::label_with_objects; use sd_prisma::{ @@ -49,7 +48,7 @@ pub(crate) fn mount() -> AlphaRouter { file_path_data .cas_id .as_ref() - .map(|cas_id| get_indexed_thumb_key(cas_id, library.id)) + .map(|cas_id| ThumbKey::new_indexed(cas_id, library.id)) }) // Filter out None values and transform each element to Vec> .collect::>(), // Collect into Vec>> }) diff --git a/core/src/api/locations.rs b/core/src/api/locations.rs index 24d56e52b699..5d9a9832441e 100644 --- a/core/src/api/locations.rs +++ b/core/src/api/locations.rs @@ -1,16 +1,16 @@ use crate::{ invalidate_query, location::{ - delete_location, find_location, indexer::OldIndexerJobInit, light_scan_location, - non_indexed::NonIndexedPathItem, relink_location, scan_location, scan_location_sub_path, - LocationCreateArgs, LocationError, LocationUpdateArgs, ScanState, + delete_location, find_location, light_scan_location, non_indexed::NonIndexedPathItem, + relink_location, scan_location, scan_location_sub_path, LocationCreateArgs, LocationError, + LocationUpdateArgs, ScanState, }, - object::old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit, old_job::StatefulJob, p2p::PeerMetadata, util::AbortOnDrop, }; +use sd_core_heavy_lifting::media_processor::ThumbKey; use sd_core_indexer_rules::IndexerRuleCreateArgs; use sd_core_prisma_helpers::{ file_path_for_frontend, label_with_objects, location_with_indexer_rules, object_with_file_paths, @@ -29,28 +29,24 @@ use tracing::{debug, error}; use super::{utils::library, Ctx, R}; -// it includes the shard hex formatted as ([["f02", "cab34a76fbf3469f"]]) -// Will be None if no thumbnail exists -pub type ThumbnailKey = Vec; - #[derive(Serialize, Type, Debug)] #[serde(tag = "type")] pub enum ExplorerItem { Path { // provide the frontend with the thumbnail key explicitly - thumbnail: Option, + thumbnail: Option, // this tells the frontend if a thumbnail actually exists or not has_created_thumbnail: bool, // we can't actually modify data from PCR types, thats why computed properties are used on ExplorerItem item: Box, }, Object { - thumbnail: Option, + thumbnail: Option, has_created_thumbnail: bool, item: object_with_file_paths::Data, }, NonIndexedPath { - thumbnail: Option, + thumbnail: Option, has_created_thumbnail: bool, item: NonIndexedPathItem, }, @@ -61,7 +57,7 @@ pub enum ExplorerItem { item: PeerMetadata, }, Label { - thumbnails: Vec, + thumbnails: Vec, item: label_with_objects::Data, }, } diff --git a/core/src/api/mod.rs b/core/src/api/mod.rs index 3ad1625e1249..cf7bb9427f97 100644 --- a/core/src/api/mod.rs +++ b/core/src/api/mod.rs @@ -8,6 +8,7 @@ use crate::{ Node, }; +use sd_core_heavy_lifting::media_processor::ThumbKey; use sd_p2p::RemoteIdentity; use sd_prisma::prisma::file_path; @@ -54,7 +55,7 @@ pub type Router = rspc::Router; #[derive(Debug, Clone, Serialize, Type)] pub enum CoreEvent { NewThumbnail { - thumb_key: Vec, + thumb_key: ThumbKey, }, NewIdentifiedObjects { file_path_ids: Vec, diff --git a/core/src/api/nodes.rs b/core/src/api/nodes.rs index b5be20f3ded5..e477ec7f830e 100644 --- a/core/src/api/nodes.rs +++ b/core/src/api/nodes.rs @@ -182,11 +182,12 @@ pub(crate) fn mount() -> AlphaRouter { }: UpdateThumbnailerPreferences| async move { node.config .update_preferences(|preferences| { - preferences - .thumbnailer - .set_background_processing_percentage( - background_processing_percentage, - ); + // TODO(fogodev): remove this crap + // preferences + // .thumbnailer + // .set_background_processing_percentage( + // background_processing_percentage, + // ); }) .await .map_err(|e| { diff --git a/core/src/api/search/mod.rs b/core/src/api/search/mod.rs index a17d97997ebd..d9badd0e5164 100644 --- a/core/src/api/search/mod.rs +++ b/core/src/api/search/mod.rs @@ -2,11 +2,11 @@ use crate::{ api::{locations::ExplorerItem, utils::library}, library::Library, location::{non_indexed, LocationError}, - object::media::old_thumbnail::get_indexed_thumb_key, util::{unsafe_streamed_query, BatchedStream}, }; use prisma_client_rust::Operator; +use sd_core_heavy_lifting::media_processor::ThumbKey; use sd_core_prisma_helpers::{file_path_for_frontend, object_with_file_paths}; use sd_prisma::prisma::{self, PrismaClient}; @@ -231,7 +231,7 @@ pub fn mount() -> AlphaRouter { .cas_id .as_ref() // .filter(|_| thumbnail_exists_locally) - .map(|i| get_indexed_thumb_key(i, library.id)), + .map(|i| ThumbKey::new_indexed(i, library.id)), has_created_thumbnail, item: Box::new(file_path), }) diff --git a/core/src/api/utils/invalidate.rs b/core/src/api/utils/invalidate.rs index bce8aecaf28e..35b3da62a9e6 100644 --- a/core/src/api/utils/invalidate.rs +++ b/core/src/api/utils/invalidate.rs @@ -135,6 +135,19 @@ impl InvalidRequests { #[macro_export] // #[allow(clippy::crate_in_macro_def)] macro_rules! invalidate_query { + + ($ctx:expr, $query:ident) => {{ + let ctx: &$crate::library::Library = &$ctx; // Assert the context is the correct type + let query: &'static str = $query; + + ::tracing::trace!(target: "sd_core::invalidate-query", "invalidate_query!(\"{}\") at {}", query, concat!(file!(), ":", line!())); + + // The error are ignored here because they aren't mission critical. If they fail the UI might be outdated for a bit. + ctx.emit($crate::api::CoreEvent::InvalidateOperation( + $crate::api::utils::InvalidateOperationEvent::dangerously_create(query, serde_json::Value::Null, None) + )) + }}; + ($ctx:expr, $key:literal) => {{ let ctx: &$crate::library::Library = &$ctx; // Assert the context is the correct type diff --git a/core/src/context.rs b/core/src/context.rs new file mode 100644 index 000000000000..03c05e52c056 --- /dev/null +++ b/core/src/context.rs @@ -0,0 +1,205 @@ +use crate::{api::CoreEvent, invalidate_query, library::Library, old_job::JobProgressEvent, Node}; + +use sd_core_heavy_lifting::{ + job_system::report::{Report, Status}, + OuterContext, ProgressUpdate, UpdateEvent, +}; +use tracing::trace; + +use std::{ + ops::{Deref, DerefMut}, + sync::Arc, +}; + +use chrono::{DateTime, Utc}; +use uuid::Uuid; + +use tokio::sync::RwLock; + +#[derive(Clone)] +pub struct NodeContext { + pub node: Arc, + pub library: Arc, +} + +mod sealed { + use crate::{library::Library, Node}; + + use std::sync::Arc; + + pub(super) trait Sealed { + fn library(&self) -> &Arc; + fn node(&self) -> &Arc; + } +} + +impl sealed::Sealed for NodeContext { + fn library(&self) -> &Arc { + &self.library + } + + fn node(&self) -> &Arc { + &self.node + } +} + +impl OuterContext for NodeContext { + fn id(&self) -> Uuid { + self.library.id + } + + fn db(&self) -> &Arc { + &self.library.db + } + + fn sync(&self) -> &Arc { + &self.library.sync + } + + fn invalidate_query(&self, query: &'static str) { + invalidate_query!(self.library, query) + } + + fn query_invalidator(&self) -> impl Fn(&'static str) + Send + Sync { + |query| { + invalidate_query!(self.library, query); + } + } + + fn report_update(&self, update: UpdateEvent) { + // FIX-ME: Remove this conversion once we have a proper atomic updates system + let event = match update { + UpdateEvent::NewThumbnail { thumb_key } => CoreEvent::NewThumbnail { thumb_key }, + UpdateEvent::NewIdentifiedObjects { file_path_ids } => { + CoreEvent::NewIdentifiedObjects { file_path_ids } + } + }; + self.node.emit(event); + } + + fn get_data_directory(&self) -> &std::path::Path { + &self.node.data_dir + } +} + +#[derive(Clone)] +pub struct JobContext { + outer_ctx: OuterCtx, + report: Arc>, + start_time: DateTime, +} + +impl OuterContext for JobContext { + fn id(&self) -> Uuid { + self.outer_ctx.id() + } + + fn db(&self) -> &Arc { + self.outer_ctx.db() + } + + fn sync(&self) -> &Arc { + self.outer_ctx.sync() + } + + fn invalidate_query(&self, query: &'static str) { + self.outer_ctx.invalidate_query(query); + } + + fn query_invalidator(&self) -> impl Fn(&'static str) + Send + Sync { + self.outer_ctx.query_invalidator() + } + + fn report_update(&self, update: UpdateEvent) { + self.outer_ctx.report_update(update); + } + + fn get_data_directory(&self) -> &std::path::Path { + self.outer_ctx.get_data_directory() + } +} + +impl sd_core_heavy_lifting::JobContext + for JobContext +{ + fn new(report: Report, outer_ctx: OuterCtx) -> Self { + Self { + report: Arc::new(RwLock::new(report)), + outer_ctx, + start_time: Utc::now(), + } + } + + async fn progress(&self, updates: Vec) { + let mut report = self.report.write().await; + + // protect against updates if job is not running + if report.status != Status::Running { + return; + }; + + for update in updates { + match update { + ProgressUpdate::TaskCount(task_count) => { + report.task_count = task_count as i32; + } + ProgressUpdate::CompletedTaskCount(completed_task_count) => { + report.completed_task_count = completed_task_count as i32; + } + + ProgressUpdate::Message(message) => { + trace!("job {} message: {}", report.id, message); + report.message = message; + } + ProgressUpdate::Phase(phase) => { + trace!( + "changing Job phase: {} -> {phase}", + report.id, + report.phase + ); + report.phase = phase; + } + } + } + + // Calculate elapsed time + let elapsed = Utc::now() - self.start_time; + + // Calculate remaining time + let task_count = report.task_count as usize; + let completed_task_count = report.completed_task_count as usize; + let remaining_task_count = task_count.saturating_sub(completed_task_count); + let remaining_time_per_task = elapsed / (completed_task_count + 1) as i32; // Adding 1 to avoid division by zero + let remaining_time = remaining_time_per_task * remaining_task_count as i32; + + // Update the report with estimated remaining time + report.estimated_completion = Utc::now() + .checked_add_signed(remaining_time) + .unwrap_or(Utc::now()); + + let library = self.outer_ctx.library(); + + // emit a CoreEvent + library.emit(CoreEvent::JobProgress(JobProgressEvent { + id: report.id, + library_id: library.id, + task_count: report.task_count, + completed_task_count: report.completed_task_count, + estimated_completion: report.estimated_completion, + phase: report.phase.clone(), + message: report.message.clone(), + })); + } + + async fn report(&self) -> impl Deref { + Arc::clone(&self.report).read_owned().await + } + + async fn report_mut(&self) -> impl DerefMut { + Arc::clone(&self.report).write_owned().await + } + + fn get_outer_ctx(&self) -> OuterCtx { + self.outer_ctx.clone() + } +} diff --git a/core/src/custom_uri/mod.rs b/core/src/custom_uri/mod.rs index 37ad328f424b..5b376d01f7b0 100644 --- a/core/src/custom_uri/mod.rs +++ b/core/src/custom_uri/mod.rs @@ -1,13 +1,13 @@ use crate::{ api::{utils::InvalidateOperationEvent, CoreEvent}, library::Library, - object::media::old_thumbnail::WEBP_EXTENSION, p2p::operations, util::InfallibleResponse, Node, }; use sd_core_file_path_helper::IsolatedFilePathData; +use sd_core_heavy_lifting::media_processor::WEBP_EXTENSION; use sd_core_prisma_helpers::file_path_to_handle_custom_uri; use sd_file_ext::text::is_text; diff --git a/core/src/lib.rs b/core/src/lib.rs index 037f57e1a4d2..cb55c4b96f16 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -4,11 +4,15 @@ use crate::{ api::{CoreEvent, Router}, location::LocationManagerError, - object::media::old_thumbnail::old_actor::OldThumbnailer, + // object::media::old_thumbnail::old_actor::OldThumbnailer, }; +use sd_core_heavy_lifting::JobSystem; + #[cfg(feature = "ai")] use sd_ai::old_image_labeler::{DownloadModelError, OldImageLabeler, YoloV8}; + +use sd_task_system::TaskSystem; use sd_utils::error::FileIOError; use api::notifications::{Notification, NotificationData, NotificationId}; @@ -18,6 +22,7 @@ use notifications::Notifications; use reqwest::{RequestBuilder, Response}; use std::{ + collections::HashMap, fmt, path::{Path, PathBuf}, sync::{atomic::AtomicBool, Arc}, @@ -34,6 +39,7 @@ use tracing_subscriber::{filter::FromEnvError, prelude::*, EnvFilter}; pub mod api; mod cloud; +mod context; #[cfg(feature = "crypto")] pub(crate) mod crypto; pub mod custom_uri; @@ -52,7 +58,8 @@ pub(crate) mod volume; pub use env::Env; -use object::media::old_thumbnail::get_ephemeral_thumbnail_path; +use context::{JobContext, NodeContext}; +use object::media::get_ephemeral_thumbnail_path; pub(crate) use sd_core_sync as sync; @@ -67,10 +74,12 @@ pub struct Node { pub p2p: Arc, pub event_bus: (broadcast::Sender, broadcast::Receiver), pub notifications: Notifications, - pub thumbnailer: OldThumbnailer, + // pub thumbnailer: OldThumbnailer, pub cloud_sync_flag: Arc, pub env: Arc, pub http: reqwest::Client, + pub task_system: TaskSystem, + pub job_system: JobSystem>, #[cfg(feature = "ai")] pub old_image_labeller: Option, } @@ -119,22 +128,26 @@ impl Node { let (old_jobs, jobs_actor) = old_job::OldJobs::new(); let libraries = library::Libraries::new(data_dir.join("libraries")).await?; + let task_system = TaskSystem::new(); + let (p2p, start_p2p) = p2p::P2PManager::new(config.clone(), libraries.clone()) .await .map_err(NodeError::P2PManager)?; let node = Arc::new(Node { data_dir: data_dir.to_path_buf(), + job_system: JobSystem::new(task_system.get_dispatcher(), data_dir), + task_system, old_jobs, locations, notifications: notifications::Notifications::new(), p2p, - thumbnailer: OldThumbnailer::new( - data_dir, - libraries.clone(), - event_bus.0.clone(), - config.preferences_watcher(), - ) - .await, + // thumbnailer: OldThumbnailer::new( + // data_dir, + // libraries.clone(), + // event_bus.0.clone(), + // config.preferences_watcher(), + // ) + // .await, config, event_bus, libraries, @@ -170,6 +183,27 @@ impl Node { locations_actor.start(node.clone()); node.libraries.init(&node).await?; jobs_actor.start(node.clone()); + + node.job_system + .init( + &node + .libraries + .get_all() + .await + .into_iter() + .map(|library| { + ( + library.id, + NodeContext { + library, + node: Arc::clone(&node), + }, + ) + }) + .collect(), + ) + .await?; + start_p2p( node.clone(), axum::Router::new() @@ -255,7 +289,7 @@ impl Node { pub async fn shutdown(&self) { info!("Spacedrive shutting down..."); - self.thumbnailer.shutdown().await; + // self.thumbnailer.shutdown().await; self.old_jobs.shutdown().await; self.p2p.shutdown().await; #[cfg(feature = "ai")] @@ -371,6 +405,9 @@ pub enum NodeError { InitConfig(#[from] util::debug_initializer::InitConfigError), #[error("logger error: {0}")] Logger(#[from] FromEnvError), + #[error(transparent)] + JobSystem(#[from] sd_core_heavy_lifting::JobSystemError), + #[cfg(feature = "ai")] #[error("ai error: {0}")] AI(#[from] sd_ai::Error), diff --git a/core/src/library/library.rs b/core/src/library/library.rs index 57ad5ef046eb..841b9f26e1e5 100644 --- a/core/src/library/library.rs +++ b/core/src/library/library.rs @@ -1,6 +1,4 @@ -use crate::{ - api::CoreEvent, cloud, object::media::old_thumbnail::get_indexed_thumbnail_path, sync, Node, -}; +use crate::{api::CoreEvent, cloud, object::media::get_indexed_thumbnail_path, sync, Node}; use sd_core_file_path_helper::IsolatedFilePathData; use sd_core_prisma_helpers::file_path_to_full_path; diff --git a/core/src/location/indexer/mod.rs b/core/src/location/indexer/mod.rs index ef1dff558160..a469cec98cfd 100644 --- a/core/src/location/indexer/mod.rs +++ b/core/src/location/indexer/mod.rs @@ -30,8 +30,8 @@ mod old_walk; use old_walk::WalkedEntry; -pub use old_indexer_job::OldIndexerJobInit; -pub use old_shallow::*; +// pub use old_indexer_job::OldIndexerJobInit; +// pub use old_shallow::*; #[derive(Serialize, Deserialize, Debug)] pub struct OldIndexerJobSaveStep { diff --git a/core/src/location/indexer/old_indexer_job.rs b/core/src/location/indexer/old_indexer_job.rs index 552a62867aeb..b86d565ae569 100644 --- a/core/src/location/indexer/old_indexer_job.rs +++ b/core/src/location/indexer/old_indexer_job.rs @@ -1,660 +1,660 @@ -use crate::{ - file_paths_db_fetcher_fn, invalidate_query, - library::Library, - location::{location_with_indexer_rules, update_location_size, ScanState}, - old_job::{ - CurrentStep, JobError, JobInitOutput, JobReportUpdate, JobResult, JobRunMetadata, - JobStepOutput, StatefulJob, WorkerContext, - }, - to_remove_db_fetcher_fn, -}; - -use sd_core_file_path_helper::{ - ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - IsolatedFilePathData, -}; -use sd_core_indexer_rules::IndexerRule; - -use sd_prisma::{ - prisma::{file_path, location}, - prisma_sync, -}; -use sd_sync::*; -use sd_utils::{db::maybe_missing, from_bytes_to_uuid, msgpack}; - -use std::{ - collections::HashMap, - hash::{Hash, Hasher}, - path::{Path, PathBuf}, - sync::Arc, - time::Duration, -}; - -use itertools::Itertools; -use prisma_client_rust::operator::or; -use serde::{Deserialize, Serialize}; -use serde_json::json; -use tokio::time::Instant; -use tracing::{debug, info, warn}; - -use super::{ - execute_indexer_save_step, execute_indexer_update_step, iso_file_path_factory, - old_walk::{keep_walking, walk, ToWalkEntry, WalkResult}, - remove_non_existing_file_paths, reverse_update_directories_sizes, IndexerError, - OldIndexerJobSaveStep, OldIndexerJobUpdateStep, -}; - -/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. -const BATCH_SIZE: usize = 1000; - -/// `IndexerJobInit` receives a `location::Data` object to be indexed -/// and possibly a `sub_path` to be indexed. The `sub_path` is used when -/// we want do index just a part of a location. -#[derive(Serialize, Deserialize, Debug)] -pub struct OldIndexerJobInit { - pub location: location_with_indexer_rules::Data, - pub sub_path: Option, -} - -impl Hash for OldIndexerJobInit { - fn hash(&self, state: &mut H) { - self.location.id.hash(state); - if let Some(ref sub_path) = self.sub_path { - sub_path.hash(state); - } - } -} - -/// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that -/// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also -/// contains some metadata for logging purposes. -#[derive(Serialize, Deserialize, Debug)] -pub struct OldIndexerJobData { - location_path: PathBuf, - indexed_path: PathBuf, - indexer_rules: Vec, -} - -#[derive(Serialize, Deserialize, Default, Debug)] -pub struct OldIndexerJobRunMetadata { - db_write_time: Duration, - scan_read_time: Duration, - total_paths: u64, - total_updated_paths: u64, - total_save_steps: u64, - total_update_steps: u64, - indexed_count: u64, - updated_count: u64, - removed_count: u64, - paths_and_sizes: HashMap, -} - -impl JobRunMetadata for OldIndexerJobRunMetadata { - fn update(&mut self, new_data: Self) { - self.db_write_time += new_data.db_write_time; - self.scan_read_time += new_data.scan_read_time; - self.total_paths += new_data.total_paths; - self.total_updated_paths += new_data.total_updated_paths; - self.total_save_steps += new_data.total_save_steps; - self.total_update_steps += new_data.total_update_steps; - self.indexed_count += new_data.indexed_count; - self.removed_count += new_data.removed_count; - - for (path, size) in new_data.paths_and_sizes { - *self.paths_and_sizes.entry(path).or_default() += size; - } - } -} - -#[derive(Clone)] -pub enum ScanProgress { - ChunkCount(usize), - SavedChunks(usize), - UpdatedChunks(usize), - Message(String), -} - -impl OldIndexerJobData { - fn on_scan_progress(ctx: &WorkerContext, progress: Vec) { - ctx.progress( - progress - .into_iter() - .map(|p| match p { - ScanProgress::ChunkCount(c) => JobReportUpdate::TaskCount(c), - ScanProgress::SavedChunks(p) | ScanProgress::UpdatedChunks(p) => { - JobReportUpdate::CompletedTaskCount(p) - } - ScanProgress::Message(m) => JobReportUpdate::Message(m), - }) - .collect(), - ) - } -} - -/// `IndexerJobStepInput` defines the action that should be executed in the current step -#[derive(Serialize, Deserialize, Debug)] -pub enum OldIndexerJobStepInput { - Save(OldIndexerJobSaveStep), - Walk(ToWalkEntry), - Update(OldIndexerJobUpdateStep), -} - -/// A `IndexerJob` is a stateful job that walks a directory and indexes all files. -/// First it walks the directory and generates a list of files to index, chunked into -/// batches of [`BATCH_SIZE`]. Then for each chunk it write the file metadata to the database. -#[async_trait::async_trait] -impl StatefulJob for OldIndexerJobInit { - type Data = OldIndexerJobData; - type Step = OldIndexerJobStepInput; - type RunMetadata = OldIndexerJobRunMetadata; - - const NAME: &'static str = "indexer"; - const IS_BATCHED: bool = true; - - fn target_location(&self) -> location::id::Type { - self.location.id - } - - /// Creates a vector of valid path buffers from a directory, chunked into batches of `BATCH_SIZE`. - async fn init( - &self, - ctx: &WorkerContext, - data: &mut Option, - ) -> Result, JobError> { - let init = self; - let location_id = init.location.id; - let location_path = maybe_missing(&init.location.path, "location.path").map(Path::new)?; - - let db = Arc::clone(&ctx.library.db); - let sync = &ctx.library.sync; - - let indexer_rules = init - .location - .indexer_rules - .iter() - .map(|rule| IndexerRule::try_from(&rule.indexer_rule)) - .collect::, _>>() - .map_err(IndexerError::from)?; - - let to_walk_path = match &init.sub_path { - Some(sub_path) if sub_path != Path::new("") => { - let full_path = ensure_sub_path_is_in_location(location_path, sub_path) - .await - .map_err(IndexerError::from)?; - ensure_sub_path_is_directory(location_path, sub_path) - .await - .map_err(IndexerError::from)?; - - ensure_file_path_exists( - sub_path, - &IsolatedFilePathData::new(location_id, location_path, &full_path, true) - .map_err(IndexerError::from)?, - &db, - IndexerError::SubPathNotFound, - ) - .await?; - - full_path - } - _ => location_path.to_path_buf(), - }; - - let scan_start = Instant::now(); - let WalkResult { - walked, - to_update, - to_walk, - to_remove, - errors, - paths_and_sizes, - } = walk( - &location_path, - &to_walk_path, - &indexer_rules, - update_notifier_fn(ctx), - file_paths_db_fetcher_fn!(&db), - to_remove_db_fetcher_fn!(location_id, &db), - iso_file_path_factory(location_id, location_path), - 50_000, - ) - .await?; - let scan_read_time = scan_start.elapsed(); - let to_remove = to_remove.collect::>(); - - debug!( - "Walker at indexer job found {} file_paths to be removed", - to_remove.len() - ); - - ctx.node - .thumbnailer - .remove_indexed_cas_ids( - to_remove - .iter() - .filter_map(|file_path| file_path.cas_id.clone()) - .collect::>(), - ctx.library.id, - ) - .await; - - let db_delete_start = Instant::now(); - // TODO pass these uuids to sync system - let removed_count = remove_non_existing_file_paths(to_remove, &db, sync).await?; - let db_delete_time = db_delete_start.elapsed(); - - let total_new_paths = &mut 0; - let total_updated_paths = &mut 0; - let to_walk_count = to_walk.len(); - let to_save_chunks = &mut 0; - let to_update_chunks = &mut 0; - - let steps = walked - .chunks(BATCH_SIZE) - .into_iter() - .enumerate() - .map(|(i, chunk)| { - let chunk_steps = chunk.collect::>(); - - *total_new_paths += chunk_steps.len() as u64; - *to_save_chunks += 1; - - OldIndexerJobStepInput::Save(OldIndexerJobSaveStep { - chunk_idx: i, - walked: chunk_steps, - }) - }) - .chain( - to_update - .chunks(BATCH_SIZE) - .into_iter() - .enumerate() - .map(|(i, chunk)| { - let chunk_updates = chunk.collect::>(); - - *total_updated_paths += chunk_updates.len() as u64; - *to_update_chunks += 1; - - OldIndexerJobStepInput::Update(OldIndexerJobUpdateStep { - chunk_idx: i, - to_update: chunk_updates, - }) - }), - ) - .chain(to_walk.into_iter().map(OldIndexerJobStepInput::Walk)) - .collect::>(); - - debug!("Walker at indexer job found {total_updated_paths} file_paths to be updated"); - - OldIndexerJobData::on_scan_progress( - ctx, - vec![ - ScanProgress::ChunkCount(*to_save_chunks + *to_update_chunks), - ScanProgress::Message(format!( - "Starting saving {total_new_paths} files or directories, \ - {total_updated_paths} files or directories to update, \ - there still {to_walk_count} directories to index", - )), - ], - ); - - *data = Some(OldIndexerJobData { - location_path: location_path.to_path_buf(), - indexed_path: to_walk_path, - indexer_rules, - }); - - Ok(( - OldIndexerJobRunMetadata { - db_write_time: db_delete_time, - scan_read_time, - total_paths: *total_new_paths, - total_updated_paths: *total_updated_paths, - indexed_count: 0, - updated_count: 0, - removed_count, - total_save_steps: *to_save_chunks as u64, - total_update_steps: *to_update_chunks as u64, - paths_and_sizes, - }, - steps, - errors - .into_iter() - .map(|e| format!("{e}")) - .collect::>() - .into(), - ) - .into()) - } - - /// Process each chunk of entries in the indexer job, writing to the `file_path` table - async fn execute_step( - &self, - ctx: &WorkerContext, - CurrentStep { step, .. }: CurrentStep<'_, Self::Step>, - data: &Self::Data, - run_metadata: &Self::RunMetadata, - ) -> Result, JobError> { - let init = self; - let mut new_metadata = Self::RunMetadata::default(); - match step { - OldIndexerJobStepInput::Save(step) => { - let start_time = Instant::now(); - - OldIndexerJobData::on_scan_progress( - ctx, - vec![ - ScanProgress::SavedChunks(step.chunk_idx + 1), - ScanProgress::Message(format!( - "Writing chunk {} of {} to database", - step.chunk_idx, run_metadata.total_save_steps - )), - ], - ); - - let count = execute_indexer_save_step(&init.location, step, &ctx.library).await?; - - new_metadata.indexed_count = count as u64; - new_metadata.db_write_time = start_time.elapsed(); - - Ok(new_metadata.into()) - } - OldIndexerJobStepInput::Update(to_update) => { - let start_time = Instant::now(); - OldIndexerJobData::on_scan_progress( - ctx, - vec![ - ScanProgress::UpdatedChunks(to_update.chunk_idx + 1), - ScanProgress::Message(format!( - "Updating chunk {} of {} to database", - to_update.chunk_idx, run_metadata.total_save_steps - )), - ], - ); - - let count = execute_indexer_update_step(to_update, &ctx.library).await?; - - new_metadata.updated_count = count as u64; - new_metadata.db_write_time = start_time.elapsed(); - - Ok(new_metadata.into()) - } - - OldIndexerJobStepInput::Walk(to_walk_entry) => { - let location_id = init.location.id; - let location_path = - maybe_missing(&init.location.path, "location.path").map(Path::new)?; - - let db = Arc::clone(&ctx.library.db); - let sync = &ctx.library.sync; - - let scan_start = Instant::now(); - - let WalkResult { - walked, - to_update, - to_walk, - to_remove, - errors, - paths_and_sizes, - } = keep_walking( - location_path, - to_walk_entry, - &data.indexer_rules, - update_notifier_fn(ctx), - file_paths_db_fetcher_fn!(&db), - to_remove_db_fetcher_fn!(location_id, &db), - iso_file_path_factory(location_id, location_path), - ) - .await?; - - new_metadata.paths_and_sizes = paths_and_sizes; - - new_metadata.scan_read_time = scan_start.elapsed(); - - let db_delete_time = Instant::now(); - // TODO pass these uuids to sync system - new_metadata.removed_count = - remove_non_existing_file_paths(to_remove, &db, sync).await?; - new_metadata.db_write_time = db_delete_time.elapsed(); - - let to_walk_count = to_walk.len(); - - let more_steps = walked - .chunks(BATCH_SIZE) - .into_iter() - .enumerate() - .map(|(i, chunk)| { - let chunk_steps = chunk.collect::>(); - new_metadata.total_paths += chunk_steps.len() as u64; - new_metadata.total_save_steps += 1; - - OldIndexerJobStepInput::Save(OldIndexerJobSaveStep { - chunk_idx: i, - walked: chunk_steps, - }) - }) - .chain(to_update.chunks(BATCH_SIZE).into_iter().enumerate().map( - |(i, chunk)| { - let chunk_updates = chunk.collect::>(); - new_metadata.total_updated_paths += chunk_updates.len() as u64; - new_metadata.total_update_steps += 1; - - OldIndexerJobStepInput::Update(OldIndexerJobUpdateStep { - chunk_idx: i, - to_update: chunk_updates, - }) - }, - )) - .chain(to_walk.into_iter().map(OldIndexerJobStepInput::Walk)) - .collect::>(); - - OldIndexerJobData::on_scan_progress( - ctx, - vec![ - ScanProgress::ChunkCount(more_steps.len() - to_walk_count), - ScanProgress::Message(format!( - "Scanned {} more files or directories; \ - {} more directories to scan and {} more entries to update", - new_metadata.total_paths, - to_walk_count, - new_metadata.total_updated_paths - )), - ], - ); - - Ok(( - more_steps, - new_metadata, - errors - .into_iter() - .map(|e| format!("{e}")) - .collect::>() - .into(), - ) - .into()) - } - } - } - - async fn finalize( - &self, - ctx: &WorkerContext, - data: &Option, - run_metadata: &Self::RunMetadata, - ) -> JobResult { - let init = self; - let indexed_path_str = data - .as_ref() - .map(|data| Ok(data.indexed_path.to_string_lossy().to_string())) - .unwrap_or_else(|| maybe_missing(&init.location.path, "location.path").cloned())?; - - info!( - "Scan of {indexed_path_str} completed in {:?}. {} new files found, \ - indexed {} files in db, updated {} entries. db write completed in {:?}", - run_metadata.scan_read_time, - run_metadata.total_paths, - run_metadata.indexed_count, - run_metadata.total_updated_paths, - run_metadata.db_write_time, - ); - - if run_metadata.indexed_count > 0 || run_metadata.removed_count > 0 { - invalidate_query!(ctx.library, "search.paths"); - } - - if run_metadata.total_updated_paths > 0 { - // Invoking orphan remover here as we probably have some orphans objects due to updates - // ctx.library.orphan_remover.invoke().await; - } - - if run_metadata.indexed_count > 0 - || run_metadata.removed_count > 0 - || run_metadata.updated_count > 0 - { - if let Some(data) = data { - update_directories_sizes( - &run_metadata.paths_and_sizes, - init.location.id, - &data.indexed_path, - &ctx.library, - ) - .await?; - - if data.indexed_path != data.location_path { - reverse_update_directories_sizes( - &data.indexed_path, - init.location.id, - &data.location_path, - &ctx.library, - ) - .await - .map_err(IndexerError::from)?; - } - - update_location_size(init.location.id, &ctx.library) - .await - .map_err(IndexerError::from)?; - - ctx.library - .db - .location() - .update( - location::id::equals(init.location.id), - vec![location::scan_state::set(ScanState::Indexed as i32)], - ) - .exec() - .await - .map_err(IndexerError::from)?; - } - } - - // FIXME(fogodev): This is currently a workaround to don't save paths and sizes in the - // metadata after a job is completed, as it's pretty heavy. A proper fix isn't needed - // right now as I already changed it in the new indexer job. And this old one - // will be removed eventually. - let run_metadata = Self::RunMetadata { - db_write_time: run_metadata.db_write_time, - scan_read_time: run_metadata.scan_read_time, - total_paths: run_metadata.total_paths, - total_updated_paths: run_metadata.total_updated_paths, - total_save_steps: run_metadata.total_save_steps, - total_update_steps: run_metadata.total_update_steps, - indexed_count: run_metadata.indexed_count, - updated_count: run_metadata.updated_count, - removed_count: run_metadata.removed_count, - paths_and_sizes: HashMap::new(), - }; - - Ok(Some(json!({"init: ": init, "run_metadata": run_metadata}))) - } -} - -fn update_notifier_fn(ctx: &WorkerContext) -> impl FnMut(&Path, usize) + '_ { - move |path, total_entries| { - OldIndexerJobData::on_scan_progress( - ctx, - vec![ScanProgress::Message(format!( - "{total_entries} entries found at {}", - path.display() - ))], - ); - } -} - -async fn update_directories_sizes( - paths_and_sizes: &HashMap, - location_id: location::id::Type, - location_path: impl AsRef, - library: &Library, -) -> Result<(), IndexerError> { - let location_path = location_path.as_ref(); - - let Library { db, sync, .. } = library; - - let chunked_queries = paths_and_sizes - .keys() - .chunks(200) - .into_iter() - .map(|paths_chunk| { - paths_chunk - .into_iter() - .map(|path| { - IsolatedFilePathData::new(location_id, location_path, path, true) - .map(file_path::WhereParam::from) - }) - .collect::, _>>() - .map(|params| { - db.file_path() - .find_many(vec![or(params)]) - .select(file_path::select!({ pub_id materialized_path name })) - }) - }) - .collect::, _>>()?; - - let to_sync_and_update = db - ._batch(chunked_queries) - .await? - .into_iter() - .flatten() - .filter_map( - |file_path| match (file_path.materialized_path, file_path.name) { - (Some(materialized_path), Some(name)) => { - let mut directory_full_path = location_path.join(&materialized_path[1..]); - directory_full_path.push(name); - - if let Some(size) = paths_and_sizes.get(&directory_full_path) { - let size_bytes = size.to_be_bytes().to_vec(); - - Some(( - sync.shared_update( - prisma_sync::file_path::SyncId { - pub_id: file_path.pub_id.clone(), - }, - file_path::size_in_bytes_bytes::NAME, - msgpack!(size_bytes.clone()), - ), - db.file_path().update( - file_path::pub_id::equals(file_path.pub_id), - vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))], - ), - )) - } else { - warn!("Found a file_path without ancestor in the database, possible corruption"); - None - } - } - _ => { - warn!( - "Found a file_path missing its materialized_path or name: ", - from_bytes_to_uuid(&file_path.pub_id) - ); - None - } - }, - ) - .unzip::<_, _, Vec<_>, Vec<_>>(); - - sync.write_ops(db, to_sync_and_update).await?; - - Ok(()) -} +// use crate::{ +// file_paths_db_fetcher_fn, invalidate_query, +// library::Library, +// location::{location_with_indexer_rules, update_location_size, ScanState}, +// old_job::{ +// CurrentStep, JobError, JobInitOutput, JobReportUpdate, JobResult, JobRunMetadata, +// JobStepOutput, StatefulJob, WorkerContext, +// }, +// to_remove_db_fetcher_fn, +// }; + +// use sd_core_file_path_helper::{ +// ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, +// IsolatedFilePathData, +// }; +// use sd_core_indexer_rules::IndexerRule; + +// use sd_prisma::{ +// prisma::{file_path, location}, +// prisma_sync, +// }; +// use sd_sync::*; +// use sd_utils::{db::maybe_missing, from_bytes_to_uuid, msgpack}; + +// use std::{ +// collections::HashMap, +// hash::{Hash, Hasher}, +// path::{Path, PathBuf}, +// sync::Arc, +// time::Duration, +// }; + +// use itertools::Itertools; +// use prisma_client_rust::operator::or; +// use serde::{Deserialize, Serialize}; +// use serde_json::json; +// use tokio::time::Instant; +// use tracing::{debug, info, warn}; + +// use super::{ +// execute_indexer_save_step, execute_indexer_update_step, iso_file_path_factory, +// old_walk::{keep_walking, walk, ToWalkEntry, WalkResult}, +// remove_non_existing_file_paths, reverse_update_directories_sizes, IndexerError, +// OldIndexerJobSaveStep, OldIndexerJobUpdateStep, +// }; + +// /// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. +// const BATCH_SIZE: usize = 1000; + +// /// `IndexerJobInit` receives a `location::Data` object to be indexed +// /// and possibly a `sub_path` to be indexed. The `sub_path` is used when +// /// we want do index just a part of a location. +// #[derive(Serialize, Deserialize, Debug)] +// pub struct OldIndexerJobInit { +// pub location: location_with_indexer_rules::Data, +// pub sub_path: Option, +// } + +// impl Hash for OldIndexerJobInit { +// fn hash(&self, state: &mut H) { +// self.location.id.hash(state); +// if let Some(ref sub_path) = self.sub_path { +// sub_path.hash(state); +// } +// } +// } + +// /// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that +// /// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also +// /// contains some metadata for logging purposes. +// #[derive(Serialize, Deserialize, Debug)] +// pub struct OldIndexerJobData { +// location_path: PathBuf, +// indexed_path: PathBuf, +// indexer_rules: Vec, +// } + +// #[derive(Serialize, Deserialize, Default, Debug)] +// pub struct OldIndexerJobRunMetadata { +// db_write_time: Duration, +// scan_read_time: Duration, +// total_paths: u64, +// total_updated_paths: u64, +// total_save_steps: u64, +// total_update_steps: u64, +// indexed_count: u64, +// updated_count: u64, +// removed_count: u64, +// paths_and_sizes: HashMap, +// } + +// impl JobRunMetadata for OldIndexerJobRunMetadata { +// fn update(&mut self, new_data: Self) { +// self.db_write_time += new_data.db_write_time; +// self.scan_read_time += new_data.scan_read_time; +// self.total_paths += new_data.total_paths; +// self.total_updated_paths += new_data.total_updated_paths; +// self.total_save_steps += new_data.total_save_steps; +// self.total_update_steps += new_data.total_update_steps; +// self.indexed_count += new_data.indexed_count; +// self.removed_count += new_data.removed_count; + +// for (path, size) in new_data.paths_and_sizes { +// *self.paths_and_sizes.entry(path).or_default() += size; +// } +// } +// } + +// #[derive(Clone)] +// pub enum ScanProgress { +// ChunkCount(usize), +// SavedChunks(usize), +// UpdatedChunks(usize), +// Message(String), +// } + +// impl OldIndexerJobData { +// fn on_scan_progress(ctx: &WorkerContext, progress: Vec) { +// ctx.progress( +// progress +// .into_iter() +// .map(|p| match p { +// ScanProgress::ChunkCount(c) => JobReportUpdate::TaskCount(c), +// ScanProgress::SavedChunks(p) | ScanProgress::UpdatedChunks(p) => { +// JobReportUpdate::CompletedTaskCount(p) +// } +// ScanProgress::Message(m) => JobReportUpdate::Message(m), +// }) +// .collect(), +// ) +// } +// } + +// /// `IndexerJobStepInput` defines the action that should be executed in the current step +// #[derive(Serialize, Deserialize, Debug)] +// pub enum OldIndexerJobStepInput { +// Save(OldIndexerJobSaveStep), +// Walk(ToWalkEntry), +// Update(OldIndexerJobUpdateStep), +// } + +// /// A `IndexerJob` is a stateful job that walks a directory and indexes all files. +// /// First it walks the directory and generates a list of files to index, chunked into +// /// batches of [`BATCH_SIZE`]. Then for each chunk it write the file metadata to the database. +// #[async_trait::async_trait] +// impl StatefulJob for OldIndexerJobInit { +// type Data = OldIndexerJobData; +// type Step = OldIndexerJobStepInput; +// type RunMetadata = OldIndexerJobRunMetadata; + +// const NAME: &'static str = "indexer"; +// const IS_BATCHED: bool = true; + +// fn target_location(&self) -> location::id::Type { +// self.location.id +// } + +// /// Creates a vector of valid path buffers from a directory, chunked into batches of `BATCH_SIZE`. +// async fn init( +// &self, +// ctx: &WorkerContext, +// data: &mut Option, +// ) -> Result, JobError> { +// let init = self; +// let location_id = init.location.id; +// let location_path = maybe_missing(&init.location.path, "location.path").map(Path::new)?; + +// let db = Arc::clone(&ctx.library.db); +// let sync = &ctx.library.sync; + +// let indexer_rules = init +// .location +// .indexer_rules +// .iter() +// .map(|rule| IndexerRule::try_from(&rule.indexer_rule)) +// .collect::, _>>() +// .map_err(IndexerError::from)?; + +// let to_walk_path = match &init.sub_path { +// Some(sub_path) if sub_path != Path::new("") => { +// let full_path = ensure_sub_path_is_in_location(location_path, sub_path) +// .await +// .map_err(IndexerError::from)?; +// ensure_sub_path_is_directory(location_path, sub_path) +// .await +// .map_err(IndexerError::from)?; + +// ensure_file_path_exists( +// sub_path, +// &IsolatedFilePathData::new(location_id, location_path, &full_path, true) +// .map_err(IndexerError::from)?, +// &db, +// IndexerError::SubPathNotFound, +// ) +// .await?; + +// full_path +// } +// _ => location_path.to_path_buf(), +// }; + +// let scan_start = Instant::now(); +// let WalkResult { +// walked, +// to_update, +// to_walk, +// to_remove, +// errors, +// paths_and_sizes, +// } = walk( +// &location_path, +// &to_walk_path, +// &indexer_rules, +// update_notifier_fn(ctx), +// file_paths_db_fetcher_fn!(&db), +// to_remove_db_fetcher_fn!(location_id, &db), +// iso_file_path_factory(location_id, location_path), +// 50_000, +// ) +// .await?; +// let scan_read_time = scan_start.elapsed(); +// let to_remove = to_remove.collect::>(); + +// debug!( +// "Walker at indexer job found {} file_paths to be removed", +// to_remove.len() +// ); + +// ctx.node +// .thumbnailer +// .remove_indexed_cas_ids( +// to_remove +// .iter() +// .filter_map(|file_path| file_path.cas_id.clone()) +// .collect::>(), +// ctx.library.id, +// ) +// .await; + +// let db_delete_start = Instant::now(); +// // TODO pass these uuids to sync system +// let removed_count = remove_non_existing_file_paths(to_remove, &db, sync).await?; +// let db_delete_time = db_delete_start.elapsed(); + +// let total_new_paths = &mut 0; +// let total_updated_paths = &mut 0; +// let to_walk_count = to_walk.len(); +// let to_save_chunks = &mut 0; +// let to_update_chunks = &mut 0; + +// let steps = walked +// .chunks(BATCH_SIZE) +// .into_iter() +// .enumerate() +// .map(|(i, chunk)| { +// let chunk_steps = chunk.collect::>(); + +// *total_new_paths += chunk_steps.len() as u64; +// *to_save_chunks += 1; + +// OldIndexerJobStepInput::Save(OldIndexerJobSaveStep { +// chunk_idx: i, +// walked: chunk_steps, +// }) +// }) +// .chain( +// to_update +// .chunks(BATCH_SIZE) +// .into_iter() +// .enumerate() +// .map(|(i, chunk)| { +// let chunk_updates = chunk.collect::>(); + +// *total_updated_paths += chunk_updates.len() as u64; +// *to_update_chunks += 1; + +// OldIndexerJobStepInput::Update(OldIndexerJobUpdateStep { +// chunk_idx: i, +// to_update: chunk_updates, +// }) +// }), +// ) +// .chain(to_walk.into_iter().map(OldIndexerJobStepInput::Walk)) +// .collect::>(); + +// debug!("Walker at indexer job found {total_updated_paths} file_paths to be updated"); + +// OldIndexerJobData::on_scan_progress( +// ctx, +// vec![ +// ScanProgress::ChunkCount(*to_save_chunks + *to_update_chunks), +// ScanProgress::Message(format!( +// "Starting saving {total_new_paths} files or directories, \ +// {total_updated_paths} files or directories to update, \ +// there still {to_walk_count} directories to index", +// )), +// ], +// ); + +// *data = Some(OldIndexerJobData { +// location_path: location_path.to_path_buf(), +// indexed_path: to_walk_path, +// indexer_rules, +// }); + +// Ok(( +// OldIndexerJobRunMetadata { +// db_write_time: db_delete_time, +// scan_read_time, +// total_paths: *total_new_paths, +// total_updated_paths: *total_updated_paths, +// indexed_count: 0, +// updated_count: 0, +// removed_count, +// total_save_steps: *to_save_chunks as u64, +// total_update_steps: *to_update_chunks as u64, +// paths_and_sizes, +// }, +// steps, +// errors +// .into_iter() +// .map(|e| format!("{e}")) +// .collect::>() +// .into(), +// ) +// .into()) +// } + +// /// Process each chunk of entries in the indexer job, writing to the `file_path` table +// async fn execute_step( +// &self, +// ctx: &WorkerContext, +// CurrentStep { step, .. }: CurrentStep<'_, Self::Step>, +// data: &Self::Data, +// run_metadata: &Self::RunMetadata, +// ) -> Result, JobError> { +// let init = self; +// let mut new_metadata = Self::RunMetadata::default(); +// match step { +// OldIndexerJobStepInput::Save(step) => { +// let start_time = Instant::now(); + +// OldIndexerJobData::on_scan_progress( +// ctx, +// vec![ +// ScanProgress::SavedChunks(step.chunk_idx + 1), +// ScanProgress::Message(format!( +// "Writing chunk {} of {} to database", +// step.chunk_idx, run_metadata.total_save_steps +// )), +// ], +// ); + +// let count = execute_indexer_save_step(&init.location, step, &ctx.library).await?; + +// new_metadata.indexed_count = count as u64; +// new_metadata.db_write_time = start_time.elapsed(); + +// Ok(new_metadata.into()) +// } +// OldIndexerJobStepInput::Update(to_update) => { +// let start_time = Instant::now(); +// OldIndexerJobData::on_scan_progress( +// ctx, +// vec![ +// ScanProgress::UpdatedChunks(to_update.chunk_idx + 1), +// ScanProgress::Message(format!( +// "Updating chunk {} of {} to database", +// to_update.chunk_idx, run_metadata.total_save_steps +// )), +// ], +// ); + +// let count = execute_indexer_update_step(to_update, &ctx.library).await?; + +// new_metadata.updated_count = count as u64; +// new_metadata.db_write_time = start_time.elapsed(); + +// Ok(new_metadata.into()) +// } + +// OldIndexerJobStepInput::Walk(to_walk_entry) => { +// let location_id = init.location.id; +// let location_path = +// maybe_missing(&init.location.path, "location.path").map(Path::new)?; + +// let db = Arc::clone(&ctx.library.db); +// let sync = &ctx.library.sync; + +// let scan_start = Instant::now(); + +// let WalkResult { +// walked, +// to_update, +// to_walk, +// to_remove, +// errors, +// paths_and_sizes, +// } = keep_walking( +// location_path, +// to_walk_entry, +// &data.indexer_rules, +// update_notifier_fn(ctx), +// file_paths_db_fetcher_fn!(&db), +// to_remove_db_fetcher_fn!(location_id, &db), +// iso_file_path_factory(location_id, location_path), +// ) +// .await?; + +// new_metadata.paths_and_sizes = paths_and_sizes; + +// new_metadata.scan_read_time = scan_start.elapsed(); + +// let db_delete_time = Instant::now(); +// // TODO pass these uuids to sync system +// new_metadata.removed_count = +// remove_non_existing_file_paths(to_remove, &db, sync).await?; +// new_metadata.db_write_time = db_delete_time.elapsed(); + +// let to_walk_count = to_walk.len(); + +// let more_steps = walked +// .chunks(BATCH_SIZE) +// .into_iter() +// .enumerate() +// .map(|(i, chunk)| { +// let chunk_steps = chunk.collect::>(); +// new_metadata.total_paths += chunk_steps.len() as u64; +// new_metadata.total_save_steps += 1; + +// OldIndexerJobStepInput::Save(OldIndexerJobSaveStep { +// chunk_idx: i, +// walked: chunk_steps, +// }) +// }) +// .chain(to_update.chunks(BATCH_SIZE).into_iter().enumerate().map( +// |(i, chunk)| { +// let chunk_updates = chunk.collect::>(); +// new_metadata.total_updated_paths += chunk_updates.len() as u64; +// new_metadata.total_update_steps += 1; + +// OldIndexerJobStepInput::Update(OldIndexerJobUpdateStep { +// chunk_idx: i, +// to_update: chunk_updates, +// }) +// }, +// )) +// .chain(to_walk.into_iter().map(OldIndexerJobStepInput::Walk)) +// .collect::>(); + +// OldIndexerJobData::on_scan_progress( +// ctx, +// vec![ +// ScanProgress::ChunkCount(more_steps.len() - to_walk_count), +// ScanProgress::Message(format!( +// "Scanned {} more files or directories; \ +// {} more directories to scan and {} more entries to update", +// new_metadata.total_paths, +// to_walk_count, +// new_metadata.total_updated_paths +// )), +// ], +// ); + +// Ok(( +// more_steps, +// new_metadata, +// errors +// .into_iter() +// .map(|e| format!("{e}")) +// .collect::>() +// .into(), +// ) +// .into()) +// } +// } +// } + +// async fn finalize( +// &self, +// ctx: &WorkerContext, +// data: &Option, +// run_metadata: &Self::RunMetadata, +// ) -> JobResult { +// let init = self; +// let indexed_path_str = data +// .as_ref() +// .map(|data| Ok(data.indexed_path.to_string_lossy().to_string())) +// .unwrap_or_else(|| maybe_missing(&init.location.path, "location.path").cloned())?; + +// info!( +// "Scan of {indexed_path_str} completed in {:?}. {} new files found, \ +// indexed {} files in db, updated {} entries. db write completed in {:?}", +// run_metadata.scan_read_time, +// run_metadata.total_paths, +// run_metadata.indexed_count, +// run_metadata.total_updated_paths, +// run_metadata.db_write_time, +// ); + +// if run_metadata.indexed_count > 0 || run_metadata.removed_count > 0 { +// invalidate_query!(ctx.library, "search.paths"); +// } + +// if run_metadata.total_updated_paths > 0 { +// // Invoking orphan remover here as we probably have some orphans objects due to updates +// // ctx.library.orphan_remover.invoke().await; +// } + +// if run_metadata.indexed_count > 0 +// || run_metadata.removed_count > 0 +// || run_metadata.updated_count > 0 +// { +// if let Some(data) = data { +// update_directories_sizes( +// &run_metadata.paths_and_sizes, +// init.location.id, +// &data.indexed_path, +// &ctx.library, +// ) +// .await?; + +// if data.indexed_path != data.location_path { +// reverse_update_directories_sizes( +// &data.indexed_path, +// init.location.id, +// &data.location_path, +// &ctx.library, +// ) +// .await +// .map_err(IndexerError::from)?; +// } + +// update_location_size(init.location.id, &ctx.library) +// .await +// .map_err(IndexerError::from)?; + +// ctx.library +// .db +// .location() +// .update( +// location::id::equals(init.location.id), +// vec![location::scan_state::set(ScanState::Indexed as i32)], +// ) +// .exec() +// .await +// .map_err(IndexerError::from)?; +// } +// } + +// // FIXME(fogodev): This is currently a workaround to don't save paths and sizes in the +// // metadata after a job is completed, as it's pretty heavy. A proper fix isn't needed +// // right now as I already changed it in the new indexer job. And this old one +// // will be removed eventually. +// let run_metadata = Self::RunMetadata { +// db_write_time: run_metadata.db_write_time, +// scan_read_time: run_metadata.scan_read_time, +// total_paths: run_metadata.total_paths, +// total_updated_paths: run_metadata.total_updated_paths, +// total_save_steps: run_metadata.total_save_steps, +// total_update_steps: run_metadata.total_update_steps, +// indexed_count: run_metadata.indexed_count, +// updated_count: run_metadata.updated_count, +// removed_count: run_metadata.removed_count, +// paths_and_sizes: HashMap::new(), +// }; + +// Ok(Some(json!({"init: ": init, "run_metadata": run_metadata}))) +// } +// } + +// fn update_notifier_fn(ctx: &WorkerContext) -> impl FnMut(&Path, usize) + '_ { +// move |path, total_entries| { +// OldIndexerJobData::on_scan_progress( +// ctx, +// vec![ScanProgress::Message(format!( +// "{total_entries} entries found at {}", +// path.display() +// ))], +// ); +// } +// } + +// async fn update_directories_sizes( +// paths_and_sizes: &HashMap, +// location_id: location::id::Type, +// location_path: impl AsRef, +// library: &Library, +// ) -> Result<(), IndexerError> { +// let location_path = location_path.as_ref(); + +// let Library { db, sync, .. } = library; + +// let chunked_queries = paths_and_sizes +// .keys() +// .chunks(200) +// .into_iter() +// .map(|paths_chunk| { +// paths_chunk +// .into_iter() +// .map(|path| { +// IsolatedFilePathData::new(location_id, location_path, path, true) +// .map(file_path::WhereParam::from) +// }) +// .collect::, _>>() +// .map(|params| { +// db.file_path() +// .find_many(vec![or(params)]) +// .select(file_path::select!({ pub_id materialized_path name })) +// }) +// }) +// .collect::, _>>()?; + +// let to_sync_and_update = db +// ._batch(chunked_queries) +// .await? +// .into_iter() +// .flatten() +// .filter_map( +// |file_path| match (file_path.materialized_path, file_path.name) { +// (Some(materialized_path), Some(name)) => { +// let mut directory_full_path = location_path.join(&materialized_path[1..]); +// directory_full_path.push(name); + +// if let Some(size) = paths_and_sizes.get(&directory_full_path) { +// let size_bytes = size.to_be_bytes().to_vec(); + +// Some(( +// sync.shared_update( +// prisma_sync::file_path::SyncId { +// pub_id: file_path.pub_id.clone(), +// }, +// file_path::size_in_bytes_bytes::NAME, +// msgpack!(size_bytes.clone()), +// ), +// db.file_path().update( +// file_path::pub_id::equals(file_path.pub_id), +// vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))], +// ), +// )) +// } else { +// warn!("Found a file_path without ancestor in the database, possible corruption"); +// None +// } +// } +// _ => { +// warn!( +// "Found a file_path missing its materialized_path or name: ", +// from_bytes_to_uuid(&file_path.pub_id) +// ); +// None +// } +// }, +// ) +// .unzip::<_, _, Vec<_>, Vec<_>>(); + +// sync.write_ops(db, to_sync_and_update).await?; + +// Ok(()) +// } diff --git a/core/src/location/indexer/old_shallow.rs b/core/src/location/indexer/old_shallow.rs index d7857bd10e9c..4860f13e9afd 100644 --- a/core/src/location/indexer/old_shallow.rs +++ b/core/src/location/indexer/old_shallow.rs @@ -1,197 +1,197 @@ -use crate::{ - file_paths_db_fetcher_fn, invalidate_query, - library::Library, - location::{ - indexer::{ - execute_indexer_update_step, reverse_update_directories_sizes, OldIndexerJobUpdateStep, - }, - scan_location_sub_path, update_location_size, - }, - old_job::JobError, - to_remove_db_fetcher_fn, Node, -}; - -use sd_core_file_path_helper::{ - check_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - IsolatedFilePathData, -}; -use sd_core_indexer_rules::IndexerRule; - -use sd_utils::db::maybe_missing; - -use std::{ - collections::HashSet, - path::{Path, PathBuf}, - sync::Arc, -}; - -use futures::future::join_all; -use itertools::Itertools; -use tracing::{debug, error}; - -use super::{ - execute_indexer_save_step, iso_file_path_factory, location_with_indexer_rules, - old_walk::walk_single_dir, remove_non_existing_file_paths, IndexerError, OldIndexerJobSaveStep, -}; - -/// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. -const BATCH_SIZE: usize = 1000; - -pub async fn old_shallow( - location: &location_with_indexer_rules::Data, - sub_path: &PathBuf, - node: &Arc, - library: &Arc, -) -> Result<(), JobError> { - let location_id = location.id; - let location_path = maybe_missing(&location.path, "location.path").map(Path::new)?; - - let db = library.db.clone(); - let sync = &library.sync; - - let indexer_rules = location - .indexer_rules - .iter() - .map(|rule| IndexerRule::try_from(&rule.indexer_rule)) - .collect::, _>>() - .map_err(IndexerError::from)?; - - let (add_root, to_walk_path) = if sub_path != Path::new("") && sub_path != Path::new("/") { - let full_path = ensure_sub_path_is_in_location(&location_path, &sub_path) - .await - .map_err(IndexerError::from)?; - ensure_sub_path_is_directory(&location_path, &sub_path) - .await - .map_err(IndexerError::from)?; - - ( - !check_file_path_exists::( - &IsolatedFilePathData::new(location_id, location_path, &full_path, true) - .map_err(IndexerError::from)?, - &db, - ) - .await?, - full_path, - ) - } else { - (false, location_path.to_path_buf()) - }; - - let (walked, to_update, to_remove, errors, _s) = { - walk_single_dir( - location_path, - &to_walk_path, - &indexer_rules, - file_paths_db_fetcher_fn!(&db), - to_remove_db_fetcher_fn!(location_id, &db), - iso_file_path_factory(location_id, location_path), - add_root, - ) - .await? - }; - - let to_remove_count = to_remove.len(); - - node.thumbnailer - .remove_indexed_cas_ids( - to_remove - .iter() - .filter_map(|file_path| file_path.cas_id.clone()) - .collect::>(), - library.id, - ) - .await; - - errors.into_iter().for_each(|e| error!("{e}")); - - remove_non_existing_file_paths(to_remove, &db, sync).await?; - - let mut new_directories_to_scan = HashSet::new(); - - let mut to_create_count = 0; - - let save_steps = walked - .chunks(BATCH_SIZE) - .into_iter() - .enumerate() - .map(|(i, chunk)| { - let walked = chunk.collect::>(); - to_create_count += walked.len(); - - walked - .iter() - .filter_map(|walked_entry| { - walked_entry.iso_file_path.materialized_path_for_children() - }) - .for_each(|new_dir| { - new_directories_to_scan.insert(new_dir); - }); - - OldIndexerJobSaveStep { - chunk_idx: i, - walked, - } - }) - .collect::>(); - - for step in save_steps { - execute_indexer_save_step(location, &step, library).await?; - } - - for scan in join_all( - new_directories_to_scan - .into_iter() - .map(|sub_path| scan_location_sub_path(node, library, location.clone(), sub_path)), - ) - .await - { - if let Err(e) = scan { - error!("{e}"); - } - } - - let mut to_update_count = 0; - - let update_steps = to_update - .chunks(BATCH_SIZE) - .into_iter() - .enumerate() - .map(|(i, chunk)| { - let to_update = chunk.collect::>(); - to_update_count += to_update.len(); - - OldIndexerJobUpdateStep { - chunk_idx: i, - to_update, - } - }) - .collect::>(); - - for step in update_steps { - execute_indexer_update_step(&step, library).await?; - } - - debug!( - "Walker at shallow indexer found: \ - To create: {to_create_count}; To update: {to_update_count}; To remove: {to_remove_count};" - ); - - if to_create_count > 0 || to_update_count > 0 || to_remove_count > 0 { - if to_walk_path != location_path { - reverse_update_directories_sizes(to_walk_path, location_id, location_path, library) - .await - .map_err(IndexerError::from)?; - } - - update_location_size(location.id, library) - .await - .map_err(IndexerError::from)?; - - invalidate_query!(library, "search.paths"); - invalidate_query!(library, "search.objects"); - } - - // library.orphan_remover.invoke().await; - - Ok(()) -} +// use crate::{ +// file_paths_db_fetcher_fn, invalidate_query, +// library::Library, +// location::{ +// indexer::{ +// execute_indexer_update_step, reverse_update_directories_sizes, OldIndexerJobUpdateStep, +// }, +// scan_location_sub_path, update_location_size, +// }, +// old_job::JobError, +// to_remove_db_fetcher_fn, Node, +// }; + +// use sd_core_file_path_helper::{ +// check_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, +// IsolatedFilePathData, +// }; +// use sd_core_indexer_rules::IndexerRule; + +// use sd_utils::db::maybe_missing; + +// use std::{ +// collections::HashSet, +// path::{Path, PathBuf}, +// sync::Arc, +// }; + +// use futures::future::join_all; +// use itertools::Itertools; +// use tracing::{debug, error}; + +// use super::{ +// execute_indexer_save_step, iso_file_path_factory, location_with_indexer_rules, +// old_walk::walk_single_dir, remove_non_existing_file_paths, IndexerError, OldIndexerJobSaveStep, +// }; + +// /// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. +// const BATCH_SIZE: usize = 1000; + +// pub async fn old_shallow( +// location: &location_with_indexer_rules::Data, +// sub_path: &PathBuf, +// node: &Arc, +// library: &Arc, +// ) -> Result<(), JobError> { +// let location_id = location.id; +// let location_path = maybe_missing(&location.path, "location.path").map(Path::new)?; + +// let db = library.db.clone(); +// let sync = &library.sync; + +// let indexer_rules = location +// .indexer_rules +// .iter() +// .map(|rule| IndexerRule::try_from(&rule.indexer_rule)) +// .collect::, _>>() +// .map_err(IndexerError::from)?; + +// let (add_root, to_walk_path) = if sub_path != Path::new("") && sub_path != Path::new("/") { +// let full_path = ensure_sub_path_is_in_location(&location_path, &sub_path) +// .await +// .map_err(IndexerError::from)?; +// ensure_sub_path_is_directory(&location_path, &sub_path) +// .await +// .map_err(IndexerError::from)?; + +// ( +// !check_file_path_exists::( +// &IsolatedFilePathData::new(location_id, location_path, &full_path, true) +// .map_err(IndexerError::from)?, +// &db, +// ) +// .await?, +// full_path, +// ) +// } else { +// (false, location_path.to_path_buf()) +// }; + + // let (walked, to_update, to_remove, errors, _s) = { + // walk_single_dir( + // location_path, + // &to_walk_path, + // &indexer_rules, + // file_paths_db_fetcher_fn!(&db), + // to_remove_db_fetcher_fn!(location_id, &db), + // iso_file_path_factory(location_id, location_path), + // add_root, + // ) + // .await? + // }; + +// let to_remove_count = to_remove.len(); + +// node.thumbnailer +// .remove_indexed_cas_ids( +// to_remove +// .iter() +// .filter_map(|file_path| file_path.cas_id.clone()) +// .collect::>(), +// library.id, +// ) +// .await; + +// errors.into_iter().for_each(|e| error!("{e}")); + +// remove_non_existing_file_paths(to_remove, &db, sync).await?; + +// let mut new_directories_to_scan = HashSet::new(); + +// let mut to_create_count = 0; + +// let save_steps = walked +// .chunks(BATCH_SIZE) +// .into_iter() +// .enumerate() +// .map(|(i, chunk)| { +// let walked = chunk.collect::>(); +// to_create_count += walked.len(); + +// walked +// .iter() +// .filter_map(|walked_entry| { +// walked_entry.iso_file_path.materialized_path_for_children() +// }) +// .for_each(|new_dir| { +// new_directories_to_scan.insert(new_dir); +// }); + +// OldIndexerJobSaveStep { +// chunk_idx: i, +// walked, +// } +// }) +// .collect::>(); + +// for step in save_steps { +// execute_indexer_save_step(location, &step, library).await?; +// } + +// for scan in join_all( +// new_directories_to_scan +// .into_iter() +// .map(|sub_path| scan_location_sub_path(node, library, location.clone(), sub_path)), +// ) +// .await +// { +// if let Err(e) = scan { +// error!("{e}"); +// } +// } + +// let mut to_update_count = 0; + +// let update_steps = to_update +// .chunks(BATCH_SIZE) +// .into_iter() +// .enumerate() +// .map(|(i, chunk)| { +// let to_update = chunk.collect::>(); +// to_update_count += to_update.len(); + +// OldIndexerJobUpdateStep { +// chunk_idx: i, +// to_update, +// } +// }) +// .collect::>(); + +// for step in update_steps { +// execute_indexer_update_step(&step, library).await?; +// } + +// debug!( +// "Walker at shallow indexer found: \ +// To create: {to_create_count}; To update: {to_update_count}; To remove: {to_remove_count};" +// ); + +// if to_create_count > 0 || to_update_count > 0 || to_remove_count > 0 { +// if to_walk_path != location_path { +// reverse_update_directories_sizes(to_walk_path, location_id, location_path, library) +// .await +// .map_err(IndexerError::from)?; +// } + +// update_location_size(location.id, library) +// .await +// .map_err(IndexerError::from)?; + +// invalidate_query!(library, "search.paths"); +// invalidate_query!(library, "search.objects"); +// } + +// // library.orphan_remover.invoke().await; + +// Ok(()) +// } diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index 98444008c42a..357541bdb058 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -6,19 +6,7 @@ use crate::{ indexer::reverse_update_directories_sizes, location_with_indexer_rules, manager::LocationManagerError, scan_location_sub_path, update_location_size, }, - object::{ - media::{ - exif_data_image_to_query_params, - exif_metadata_extractor::{can_extract_exif_data_for_image, extract_exif_data}, - ffmpeg_metadata_extractor::{ - can_extract_ffmpeg_data_for_audio, can_extract_ffmpeg_data_for_video, - extract_ffmpeg_data, save_ffmpeg_data, - }, - old_thumbnail::get_indexed_thumbnail_path, - }, - old_file_identifier::FileMetadata, - validation::hash::file_checksum, - }, + object::{media::get_indexed_thumbnail_path, validation::hash::file_checksum}, Node, }; @@ -28,6 +16,13 @@ use sd_core_file_path_helper::{ loose_find_existing_file_path_params, path_is_hidden, FilePathError, FilePathMetadata, IsolatedFilePathData, MetadataExt, }; +use sd_core_heavy_lifting::{ + file_identifier::FileMetadata, + media_processor::{ + exif_media_data, ffmpeg_media_data, generate_single_thumbnail, get_thumbnails_directory, + ThumbnailKind, + }, +}; use sd_core_prisma_helpers::file_path_with_object; use sd_file_ext::{ @@ -35,14 +30,14 @@ use sd_file_ext::{ kind::ObjectKind, }; use sd_prisma::{ - prisma::{exif_data, file_path, location, object}, + prisma::{file_path, location, object}, prisma_sync, }; use sd_sync::OperationFactory; use sd_utils::{ db::{inode_from_db, inode_to_db, maybe_missing}, error::FileIOError, - msgpack, uuid_to_bytes, + from_bytes_to_uuid, msgpack, uuid_to_bytes, }; #[cfg(target_family = "unix")] @@ -330,14 +325,19 @@ async fn inner_create_file( spawn({ let extension = extension.clone(); let path = path.to_path_buf(); - let node = node.clone(); + let thumbnails_directory = + get_thumbnails_directory(node.config.data_directory()); let library_id = *library_id; async move { - if let Err(e) = node - .thumbnailer - .generate_single_indexed_thumbnail(&extension, cas_id, path, library_id) - .await + if let Err(e) = generate_single_thumbnail( + &thumbnails_directory, + extension, + cas_id, + path, + ThumbnailKind::Indexed(library_id), + ) + .await { error!("Failed to generate thumbnail in the watcher: {e:#?}"); } @@ -349,34 +349,15 @@ async fn inner_create_file( match kind { ObjectKind::Image => { if let Ok(image_extension) = ImageExtension::from_str(&extension) { - if can_extract_exif_data_for_image(&image_extension) { - if let Ok(Some(exif_data)) = extract_exif_data(path) + if exif_media_data::can_extract(image_extension) { + if let Ok(Some(exif_data)) = exif_media_data::extract(path) .await .map_err(|e| error!("Failed to extract media data: {e:#?}")) { - let (sync_params, db_params) = - exif_data_image_to_query_params(exif_data); - - sync.write_ops( + exif_media_data::save( + [(exif_data, object_id, from_bytes_to_uuid(&object_pub_id))], db, - ( - sync.shared_create( - prisma_sync::exif_data::SyncId { - object: prisma_sync::object::SyncId { - pub_id: object_pub_id.clone(), - }, - }, - sync_params, - ), - db.exif_data().upsert( - exif_data::object_id::equals(object_id), - exif_data::create( - object::id::equals(object_id), - db_params.clone(), - ), - db_params, - ), - ), + sync, ) .await?; } @@ -386,12 +367,12 @@ async fn inner_create_file( ObjectKind::Audio => { if let Ok(audio_extension) = AudioExtension::from_str(&extension) { - if can_extract_ffmpeg_data_for_audio(&audio_extension) { - if let Ok(ffmpeg_data) = extract_ffmpeg_data(path) + if ffmpeg_media_data::can_extract_for_audio(audio_extension) { + if let Ok(ffmpeg_data) = ffmpeg_media_data::extract(path) .await .map_err(|e| error!("Failed to extract media data: {e:#?}")) { - save_ffmpeg_data([(ffmpeg_data, object_id)], db).await?; + ffmpeg_media_data::save([(ffmpeg_data, object_id)], db).await?; } } } @@ -399,12 +380,12 @@ async fn inner_create_file( ObjectKind::Video => { if let Ok(video_extension) = VideoExtension::from_str(&extension) { - if can_extract_ffmpeg_data_for_video(&video_extension) { - if let Ok(ffmpeg_data) = extract_ffmpeg_data(path) + if ffmpeg_media_data::can_extract_for_video(video_extension) { + if let Ok(ffmpeg_data) = ffmpeg_media_data::extract(path) .await .map_err(|e| error!("Failed to extract media data: {e:#?}")) { - save_ffmpeg_data([(ffmpeg_data, object_id)], db).await?; + ffmpeg_media_data::save([(ffmpeg_data, object_id)], db).await?; } } } @@ -694,13 +675,18 @@ async fn inner_update_file( let library_id = library.id; let old_cas_id = old_cas_id.clone(); spawn(async move { + let thumbnails_directory = + get_thumbnails_directory(node.config.data_directory()); + let was_overwritten = old_cas_id == cas_id; - if let Err(e) = node - .thumbnailer - .generate_single_indexed_thumbnail( - &ext, cas_id, path, library_id, - ) - .await + if let Err(e) = generate_single_thumbnail( + &thumbnails_directory, + ext.clone(), + cas_id, + path, + ThumbnailKind::Indexed(library_id), + ) + .await { error!("Failed to generate thumbnail in the watcher: {e:#?}"); } @@ -728,34 +714,19 @@ async fn inner_update_file( match kind { ObjectKind::Image => { if let Ok(image_extension) = ImageExtension::from_str(extension) { - if can_extract_exif_data_for_image(&image_extension) { - if let Ok(Some(exif_data)) = extract_exif_data(full_path) + if exif_media_data::can_extract(image_extension) { + if let Ok(Some(exif_data)) = exif_media_data::extract(full_path) .await .map_err(|e| error!("Failed to extract media data: {e:#?}")) { - let (sync_params, db_params) = - exif_data_image_to_query_params(exif_data); - - sync.write_ops( + exif_media_data::save( + [( + exif_data, + object.id, + from_bytes_to_uuid(&object.pub_id), + )], db, - ( - sync.shared_create( - prisma_sync::exif_data::SyncId { - object: prisma_sync::object::SyncId { - pub_id: object.pub_id.clone(), - }, - }, - sync_params, - ), - db.exif_data().upsert( - exif_data::object_id::equals(object.id), - exif_data::create( - object::id::equals(object.id), - db_params.clone(), - ), - db_params, - ), - ), + sync, ) .await?; } @@ -765,12 +736,12 @@ async fn inner_update_file( ObjectKind::Audio => { if let Ok(audio_extension) = AudioExtension::from_str(extension) { - if can_extract_ffmpeg_data_for_audio(&audio_extension) { - if let Ok(ffmpeg_data) = extract_ffmpeg_data(full_path) + if ffmpeg_media_data::can_extract_for_audio(audio_extension) { + if let Ok(ffmpeg_data) = ffmpeg_media_data::extract(full_path) .await .map_err(|e| error!("Failed to extract media data: {e:#?}")) { - save_ffmpeg_data([(ffmpeg_data, object.id)], db).await?; + ffmpeg_media_data::save([(ffmpeg_data, object.id)], db).await?; } } } @@ -778,12 +749,12 @@ async fn inner_update_file( ObjectKind::Video => { if let Ok(video_extension) = VideoExtension::from_str(extension) { - if can_extract_ffmpeg_data_for_video(&video_extension) { - if let Ok(ffmpeg_data) = extract_ffmpeg_data(full_path) + if ffmpeg_media_data::can_extract_for_video(video_extension) { + if let Ok(ffmpeg_data) = ffmpeg_media_data::extract(full_path) .await .map_err(|e| error!("Failed to extract media data: {e:#?}")) { - save_ffmpeg_data([(ffmpeg_data, object.id)], db).await?; + ffmpeg_media_data::save([(ffmpeg_data, object.id)], db).await?; } } } diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index cb27fad560be..be05ec7b6987 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -1,10 +1,6 @@ use crate::{ invalidate_query, library::Library, - object::{ - media::{old_media_processor, OldMediaProcessorJobInit}, - old_file_identifier::{self, old_file_identifier_job::OldFileIdentifierJobInit}, - }, old_job::{JobBuilder, JobError, JobManagerError}, Node, }; @@ -49,7 +45,7 @@ pub mod metadata; pub mod non_indexed; pub use error::LocationError; -use indexer::OldIndexerJobInit; +// use indexer::OldIndexerJobInit; pub use manager::{LocationManagerError, Locations}; use metadata::SpacedriveLocationMetadataFile; diff --git a/core/src/location/non_indexed.rs b/core/src/location/non_indexed.rs index 9050d8f0c775..88bbb91f1fe8 100644 --- a/core/src/location/non_indexed.rs +++ b/core/src/location/non_indexed.rs @@ -1,14 +1,18 @@ use crate::{ api::locations::ExplorerItem, + context::NodeContext, library::Library, object::{ cas::generate_cas_id, - media::old_thumbnail::{get_ephemeral_thumb_key, BatchToProcess, GenerateThumbnailArgs}, + // media::old_thumbnail::{get_ephemeral_thumb_key, BatchToProcess, GenerateThumbnailArgs}, }, Node, }; use sd_core_file_path_helper::{path_is_hidden, MetadataExt}; +use sd_core_heavy_lifting::media_processor::{ + self, get_thumbnails_directory, GenerateThumbnailArgs, NewThumbnailsReporter, ThumbKey, +}; use sd_core_indexer_rules::{ seed::{NO_HIDDEN, NO_SYSTEM_FILES}, IndexerRule, RuleKind, @@ -28,7 +32,7 @@ use std::{ use chrono::{DateTime, Utc}; use futures::Stream; -use itertools::Either; +use itertools::{Either, Itertools}; use rspc::ErrorCode; use serde::Serialize; use specta::Type; @@ -223,7 +227,7 @@ pub async fn walk( } ( - Some(get_ephemeral_thumb_key(&cas_id)), + Some(ThumbKey::new_ephemeral(&cas_id)), node.ephemeral_thumbnail_exists(&cas_id) .await .map_err(NonIndexedLocationError::from)?, @@ -256,12 +260,29 @@ pub async fn walk( thumbnails_to_generate.extend(document_thumbnails_to_generate); - node.thumbnailer - .new_ephemeral_thumbnails_batch(BatchToProcess::new( - thumbnails_to_generate, - false, - false, - )) + let thumbnails_directory = Arc::new(get_thumbnails_directory(node.config.data_directory())); + let reporter = NewThumbnailsReporter { + ctx: NodeContext { + node: Arc::clone(&node), + library: Arc::clone(&library), + }, + }; + + node.task_system + .dispatch_many( + thumbnails_to_generate + .into_iter() + .chunks(10) + .into_iter() + .map(|chunk| { + media_processor::Thumbnailer::new_ephemeral( + Arc::clone(&thumbnails_directory), + chunk.collect(), + reporter.clone(), + ) + }) + .collect::>(), + ) .await; let mut locations = library diff --git a/core/src/node/config.rs b/core/src/node/config.rs index 25e225af724a..eb3bd102bf2c 100644 --- a/core/src/node/config.rs +++ b/core/src/node/config.rs @@ -1,6 +1,6 @@ use crate::{ api::{notifications::Notification, BackendFeature}, - object::media::old_thumbnail::preferences::ThumbnailerPreferences, + /*object::media::old_thumbnail::preferences::ThumbnailerPreferences,*/ util::version_manager::{Kind, ManagedVersion, VersionManager, VersionManagerError}, }; @@ -154,7 +154,7 @@ mod identity_serde { #[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq, Type)] pub struct NodePreferences { - pub thumbnailer: ThumbnailerPreferences, + // pub thumbnailer: ThumbnailerPreferences, } #[derive( diff --git a/core/src/object/media/mod.rs b/core/src/object/media/mod.rs index 271abe87301d..b62317d694ab 100644 --- a/core/src/object/media/mod.rs +++ b/core/src/object/media/mod.rs @@ -1,278 +1,294 @@ -use sd_core_prisma_helpers::object_with_media_data; -use sd_media_metadata::{ - ffmpeg::{ - audio_props::AudioProps, - chapter::Chapter, - codec::{Codec, Props}, - program::Program, - stream::Stream, - video_props::VideoProps, - }, - ExifMetadata, FFmpegMetadata, -}; -use sd_prisma::prisma::{ - exif_data::*, ffmpeg_media_audio_props, ffmpeg_media_chapter, ffmpeg_media_video_props, -}; +// use sd_core_prisma_helpers::object_with_media_data; +// use sd_media_metadata::{ +// ffmpeg::{ +// audio_props::AudioProps, +// chapter::Chapter, +// codec::{Codec, Props}, +// program::Program, +// stream::Stream, +// video_props::VideoProps, +// }, +// ExifMetadata, FFmpegMetadata, +// }; +// use sd_prisma::prisma::{ +// exif_data::*, ffmpeg_media_audio_props, ffmpeg_media_chapter, ffmpeg_media_video_props, +// }; -pub mod exif_metadata_extractor; -pub mod ffmpeg_metadata_extractor; -pub mod old_media_processor; -pub mod old_thumbnail; +// pub mod exif_metadata_extractor; +// pub mod ffmpeg_metadata_extractor; +// pub mod old_media_processor; +// pub mod old_thumbnail; -pub use old_media_processor::OldMediaProcessorJobInit; -use sd_utils::db::ffmpeg_data_field_from_db; +use std::path::PathBuf; -pub fn exif_data_image_to_query(mdi: ExifMetadata, object_id: object_id::Type) -> CreateUnchecked { - CreateUnchecked { - object_id, - _params: vec![ - camera_data::set(serde_json::to_vec(&mdi.camera_data).ok()), - media_date::set(serde_json::to_vec(&mdi.date_taken).ok()), - resolution::set(serde_json::to_vec(&mdi.resolution).ok()), - media_location::set(serde_json::to_vec(&mdi.location).ok()), - artist::set(mdi.artist), - description::set(mdi.description), - copyright::set(mdi.copyright), - exif_version::set(mdi.exif_version), - epoch_time::set(mdi.date_taken.map(|x| x.unix_timestamp())), - ], - } -} +use sd_core_heavy_lifting::media_processor::{ThumbKey, ThumbnailKind}; -pub fn exif_data_image_to_query_params( - mdi: ExifMetadata, -) -> (Vec<(&'static str, rmpv::Value)>, Vec) { - use sd_sync::option_sync_db_entry; - use sd_utils::chain_optional_iter; +use crate::{library::LibraryId, Node}; - chain_optional_iter( - [], - [ - option_sync_db_entry!(serde_json::to_vec(&mdi.camera_data).ok(), camera_data), - option_sync_db_entry!(serde_json::to_vec(&mdi.date_taken).ok(), media_date), - option_sync_db_entry!(serde_json::to_vec(&mdi.location).ok(), media_location), - option_sync_db_entry!(mdi.artist, artist), - option_sync_db_entry!(mdi.description, description), - option_sync_db_entry!(mdi.copyright, copyright), - option_sync_db_entry!(mdi.exif_version, exif_version), - ], - ) - .into_iter() - .unzip() +/// This does not check if a thumbnail exists, it just returns the path that it would exist at +pub fn get_indexed_thumbnail_path(node: &Node, cas_id: &str, library_id: LibraryId) -> PathBuf { + ThumbnailKind::Indexed(library_id).compute_path(node.config.data_directory(), cas_id) } -pub fn exif_media_data_from_prisma_data(data: sd_prisma::prisma::exif_data::Data) -> ExifMetadata { - ExifMetadata { - camera_data: from_slice_option_to_option(data.camera_data).unwrap_or_default(), - date_taken: from_slice_option_to_option(data.media_date).unwrap_or_default(), - resolution: from_slice_option_to_option(data.resolution).unwrap_or_default(), - location: from_slice_option_to_option(data.media_location), - artist: data.artist, - description: data.description, - copyright: data.copyright, - exif_version: data.exif_version, - } +/// This does not check if a thumbnail exists, it just returns the path that it would exist at +pub fn get_ephemeral_thumbnail_path(node: &Node, cas_id: &str) -> PathBuf { + ThumbnailKind::Ephemeral.compute_path(node.config.data_directory(), cas_id) } -pub fn ffmpeg_data_from_prisma_data( - object_with_media_data::ffmpeg_data::Data { - formats, - duration, - start_time, - bit_rate, - metadata, - chapters, - programs, - .. - }: object_with_media_data::ffmpeg_data::Data, -) -> FFmpegMetadata { - FFmpegMetadata { - formats: formats.split(',').map(String::from).collect::>(), - duration: duration.map(|duration| { - let duration = ffmpeg_data_field_from_db(&duration); - ((duration >> 32) as i32, duration as u32) - }), - start_time: start_time.map(|start_time| { - let start_time = ffmpeg_data_field_from_db(&start_time); - ((start_time >> 32) as i32, start_time as u32) - }), - bit_rate: { - let bit_rate = ffmpeg_data_field_from_db(&bit_rate); - ((bit_rate >> 32) as i32, bit_rate as u32) - }, - chapters: chapters - .into_iter() - .map( - |ffmpeg_media_chapter::Data { - chapter_id, - start, - end, - time_base_den, - time_base_num, - metadata, - .. - }| Chapter { - id: chapter_id, - start: { - let start = ffmpeg_data_field_from_db(&start); - ((start >> 32) as i32, start as u32) - }, - end: { - let end = ffmpeg_data_field_from_db(&end); - ((end >> 32) as i32, end as u32) - }, - time_base_den, - time_base_num, - metadata: from_slice_option_to_option(metadata).unwrap_or_default(), - }, - ) - .collect(), - programs: programs - .into_iter() - .map( - |object_with_media_data::ffmpeg_data::programs::Data { - program_id, - name, - metadata, - streams, - .. - }| Program { - id: program_id, - name, - streams: streams - .into_iter() - .map( - |object_with_media_data::ffmpeg_data::programs::streams::Data { - stream_id, - name, - aspect_ratio_num, - aspect_ratio_den, - frames_per_second_num, - frames_per_second_den, - time_base_real_den, - time_base_real_num, - dispositions, - metadata, - codec, - .. - }| { - Stream { - id: stream_id, - name, - codec: codec.map( - |object_with_media_data::ffmpeg_data::programs::streams::codec::Data{ - kind, - sub_kind, - tag, - name, - profile, - bit_rate, - audio_props, - video_props, - .. - }| Codec { - kind, - sub_kind, - tag, - name, - profile, - bit_rate, - props: match (audio_props, video_props) { - ( - Some(ffmpeg_media_audio_props::Data { - delay, - padding, - sample_rate, - sample_format, - bit_per_sample, - channel_layout, - .. - }), - None, - ) => Some(Props::Audio(AudioProps { - delay, - padding, - sample_rate, - sample_format, - bit_per_sample, - channel_layout, - })), - ( - None, - Some(ffmpeg_media_video_props::Data { - pixel_format, - color_range, - bits_per_channel, - color_space, - color_primaries, - color_transfer, - field_order, - chroma_location, - width, - height, - aspect_ratio_num, - aspect_ratio_den, - properties, - .. - }), - ) => Some(Props::Video(VideoProps { - pixel_format, - color_range, - bits_per_channel, - color_space, - color_primaries, - color_transfer, - field_order, - chroma_location, - width, - height, - aspect_ratio_num, - aspect_ratio_den, - properties: properties - .map(|dispositions| { - dispositions - .split(',') - .map(String::from) - .collect::>() - }) - .unwrap_or_default(), - })), - _ => None, - }, - } - ), - aspect_ratio_num, - aspect_ratio_den, - frames_per_second_num, - frames_per_second_den, - time_base_real_den, - time_base_real_num, - dispositions: dispositions - .map(|dispositions| { - dispositions - .split(',') - .map(String::from) - .collect::>() - }) - .unwrap_or_default(), - metadata: from_slice_option_to_option(metadata).unwrap_or_default(), - } - }, - ) - .collect(), - metadata: from_slice_option_to_option(metadata).unwrap_or_default(), - }, - ) - .collect(), - metadata: from_slice_option_to_option(metadata).unwrap_or_default(), - } -} +// pub use old_media_processor::OldMediaProcessorJobInit; +// use sd_utils::db::ffmpeg_data_field_from_db; -#[must_use] -fn from_slice_option_to_option( - value: Option>, -) -> Option { - value - .map(|x| serde_json::from_slice(&x).ok()) - .unwrap_or_default() -} +// pub fn exif_data_image_to_query(mdi: ExifMetadata, object_id: object_id::Type) -> CreateUnchecked { +// CreateUnchecked { +// object_id, +// _params: vec![ +// camera_data::set(serde_json::to_vec(&mdi.camera_data).ok()), +// media_date::set(serde_json::to_vec(&mdi.date_taken).ok()), +// resolution::set(serde_json::to_vec(&mdi.resolution).ok()), +// media_location::set(serde_json::to_vec(&mdi.location).ok()), +// artist::set(mdi.artist), +// description::set(mdi.description), +// copyright::set(mdi.copyright), +// exif_version::set(mdi.exif_version), +// epoch_time::set(mdi.date_taken.map(|x| x.unix_timestamp())), +// ], +// } +// } + +// pub fn exif_data_image_to_query_params( +// mdi: ExifMetadata, +// ) -> (Vec<(&'static str, rmpv::Value)>, Vec) { +// use sd_sync::option_sync_db_entry; +// use sd_utils::chain_optional_iter; + +// chain_optional_iter( +// [], +// [ +// option_sync_db_entry!(serde_json::to_vec(&mdi.camera_data).ok(), camera_data), +// option_sync_db_entry!(serde_json::to_vec(&mdi.date_taken).ok(), media_date), +// option_sync_db_entry!(serde_json::to_vec(&mdi.location).ok(), media_location), +// option_sync_db_entry!(mdi.artist, artist), +// option_sync_db_entry!(mdi.description, description), +// option_sync_db_entry!(mdi.copyright, copyright), +// option_sync_db_entry!(mdi.exif_version, exif_version), +// ], +// ) +// .into_iter() +// .unzip() +// } + +// pub fn exif_media_data_from_prisma_data(data: sd_prisma::prisma::exif_data::Data) -> ExifMetadata { +// ExifMetadata { +// camera_data: from_slice_option_to_option(data.camera_data).unwrap_or_default(), +// date_taken: from_slice_option_to_option(data.media_date).unwrap_or_default(), +// resolution: from_slice_option_to_option(data.resolution).unwrap_or_default(), +// location: from_slice_option_to_option(data.media_location), +// artist: data.artist, +// description: data.description, +// copyright: data.copyright, +// exif_version: data.exif_version, +// } +// } + +// pub fn ffmpeg_data_from_prisma_data( +// object_with_media_data::ffmpeg_data::Data { +// formats, +// duration, +// start_time, +// bit_rate, +// metadata, +// chapters, +// programs, +// .. +// }: object_with_media_data::ffmpeg_data::Data, +// ) -> FFmpegMetadata { +// FFmpegMetadata { +// formats: formats.split(',').map(String::from).collect::>(), +// duration: duration.map(|duration| { +// let duration = ffmpeg_data_field_from_db(&duration); +// ((duration >> 32) as i32, duration as u32) +// }), +// start_time: start_time.map(|start_time| { +// let start_time = ffmpeg_data_field_from_db(&start_time); +// ((start_time >> 32) as i32, start_time as u32) +// }), +// bit_rate: { +// let bit_rate = ffmpeg_data_field_from_db(&bit_rate); +// ((bit_rate >> 32) as i32, bit_rate as u32) +// }, +// chapters: chapters +// .into_iter() +// .map( +// |ffmpeg_media_chapter::Data { +// chapter_id, +// start, +// end, +// time_base_den, +// time_base_num, +// metadata, +// .. +// }| Chapter { +// id: chapter_id, +// start: { +// let start = ffmpeg_data_field_from_db(&start); +// ((start >> 32) as i32, start as u32) +// }, +// end: { +// let end = ffmpeg_data_field_from_db(&end); +// ((end >> 32) as i32, end as u32) +// }, +// time_base_den, +// time_base_num, +// metadata: from_slice_option_to_option(metadata).unwrap_or_default(), +// }, +// ) +// .collect(), +// programs: programs +// .into_iter() +// .map( +// |object_with_media_data::ffmpeg_data::programs::Data { +// program_id, +// name, +// metadata, +// streams, +// .. +// }| Program { +// id: program_id, +// name, +// streams: streams +// .into_iter() +// .map( +// |object_with_media_data::ffmpeg_data::programs::streams::Data { +// stream_id, +// name, +// aspect_ratio_num, +// aspect_ratio_den, +// frames_per_second_num, +// frames_per_second_den, +// time_base_real_den, +// time_base_real_num, +// dispositions, +// metadata, +// codec, +// .. +// }| { +// Stream { +// id: stream_id, +// name, +// codec: codec.map( +// |object_with_media_data::ffmpeg_data::programs::streams::codec::Data{ +// kind, +// sub_kind, +// tag, +// name, +// profile, +// bit_rate, +// audio_props, +// video_props, +// .. +// }| Codec { +// kind, +// sub_kind, +// tag, +// name, +// profile, +// bit_rate, +// props: match (audio_props, video_props) { +// ( +// Some(ffmpeg_media_audio_props::Data { +// delay, +// padding, +// sample_rate, +// sample_format, +// bit_per_sample, +// channel_layout, +// .. +// }), +// None, +// ) => Some(Props::Audio(AudioProps { +// delay, +// padding, +// sample_rate, +// sample_format, +// bit_per_sample, +// channel_layout, +// })), +// ( +// None, +// Some(ffmpeg_media_video_props::Data { +// pixel_format, +// color_range, +// bits_per_channel, +// color_space, +// color_primaries, +// color_transfer, +// field_order, +// chroma_location, +// width, +// height, +// aspect_ratio_num, +// aspect_ratio_den, +// properties, +// .. +// }), +// ) => Some(Props::Video(VideoProps { +// pixel_format, +// color_range, +// bits_per_channel, +// color_space, +// color_primaries, +// color_transfer, +// field_order, +// chroma_location, +// width, +// height, +// aspect_ratio_num, +// aspect_ratio_den, +// properties: properties +// .map(|dispositions| { +// dispositions +// .split(',') +// .map(String::from) +// .collect::>() +// }) +// .unwrap_or_default(), +// })), +// _ => None, +// }, +// } +// ), +// aspect_ratio_num, +// aspect_ratio_den, +// frames_per_second_num, +// frames_per_second_den, +// time_base_real_den, +// time_base_real_num, +// dispositions: dispositions +// .map(|dispositions| { +// dispositions +// .split(',') +// .map(String::from) +// .collect::>() +// }) +// .unwrap_or_default(), +// metadata: from_slice_option_to_option(metadata).unwrap_or_default(), +// } +// }, +// ) +// .collect(), +// metadata: from_slice_option_to_option(metadata).unwrap_or_default(), +// }, +// ) +// .collect(), +// metadata: from_slice_option_to_option(metadata).unwrap_or_default(), +// } +// } + +// #[must_use] +// fn from_slice_option_to_option( +// value: Option>, +// ) -> Option { +// value +// .map(|x| serde_json::from_slice(&x).ok()) +// .unwrap_or_default() +// } diff --git a/core/src/object/media/old_thumbnail/mod.rs b/core/src/object/media/old_thumbnail/mod.rs index b55cf833dba4..604f7c750ead 100644 --- a/core/src/object/media/old_thumbnail/mod.rs +++ b/core/src/object/media/old_thumbnail/mod.rs @@ -1,205 +1,192 @@ -use crate::{library::LibraryId, util::version_manager::VersionManagerError, Node}; - -use sd_file_ext::extensions::{ - DocumentExtension, Extension, ImageExtension, ALL_DOCUMENT_EXTENSIONS, ALL_IMAGE_EXTENSIONS, -}; -use sd_utils::error::FileIOError; - -#[cfg(feature = "ffmpeg")] -use sd_file_ext::extensions::{VideoExtension, ALL_VIDEO_EXTENSIONS}; - -use std::{ - path::{Path, PathBuf}, - time::Duration, -}; - -use once_cell::sync::Lazy; -use serde::{Deserialize, Serialize}; -use thiserror::Error; -use tokio::task; -use tracing::error; - -mod clean_up; -mod directory; -pub mod old_actor; -pub mod preferences; -mod process; -mod shard; -mod state; -mod worker; - -pub use process::{BatchToProcess, GenerateThumbnailArgs}; -pub use shard::get_shard_hex; - -use directory::ThumbnailVersion; - -// Files names constants -const THUMBNAIL_CACHE_DIR_NAME: &str = "thumbnails"; -const SAVE_STATE_FILE: &str = "thumbs_to_process.bin"; -const VERSION_FILE: &str = "version.txt"; -pub const WEBP_EXTENSION: &str = "webp"; -const EPHEMERAL_DIR: &str = "ephemeral"; - -/// This is the target pixel count for all thumbnails to be resized to, and it is eventually downscaled -/// to [`TARGET_QUALITY`]. -const TARGET_PX: f32 = 1048576.0; // 1024x1024 - -/// This is the target quality that we render thumbnails at, it is a float between 0-100 -/// and is treated as a percentage (so 60% in this case, or it's the same as multiplying by `0.6`). -const TARGET_QUALITY: f32 = 60.0; - -// Some time constants -const ONE_SEC: Duration = Duration::from_secs(1); -const THIRTY_SECS: Duration = Duration::from_secs(30); -const HALF_HOUR: Duration = Duration::from_secs(30 * 60); - -#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] -pub enum ThumbnailKind { - Ephemeral, - Indexed(LibraryId), -} - -pub fn get_indexed_thumbnail_path(node: &Node, cas_id: &str, library_id: LibraryId) -> PathBuf { - get_thumbnail_path(node, cas_id, ThumbnailKind::Indexed(library_id)) -} - -pub fn get_ephemeral_thumbnail_path(node: &Node, cas_id: &str) -> PathBuf { - get_thumbnail_path(node, cas_id, ThumbnailKind::Ephemeral) -} - -/// This does not check if a thumbnail exists, it just returns the path that it would exist at -fn get_thumbnail_path(node: &Node, cas_id: &str, kind: ThumbnailKind) -> PathBuf { - let mut thumb_path = node.config.data_directory(); - - thumb_path.push(THUMBNAIL_CACHE_DIR_NAME); - match kind { - ThumbnailKind::Ephemeral => thumb_path.push(EPHEMERAL_DIR), - ThumbnailKind::Indexed(library_id) => { - thumb_path.push(library_id.to_string()); - } - } - thumb_path.push(get_shard_hex(cas_id)); - thumb_path.push(cas_id); - thumb_path.set_extension(WEBP_EXTENSION); - - thumb_path -} - -pub fn get_indexed_thumb_key(cas_id: &str, library_id: LibraryId) -> Vec { - get_thumb_key(cas_id, ThumbnailKind::Indexed(library_id)) -} - -pub fn get_ephemeral_thumb_key(cas_id: &str) -> Vec { - get_thumb_key(cas_id, ThumbnailKind::Ephemeral) -} - -// this is used to pass the relevant data to the frontend so it can request the thumbnail -// it supports extending the shard hex to support deeper directory structures in the future -fn get_thumb_key(cas_id: &str, kind: ThumbnailKind) -> Vec { - vec![ - match kind { - ThumbnailKind::Ephemeral => String::from(EPHEMERAL_DIR), - ThumbnailKind::Indexed(library_id) => library_id.to_string(), - }, - get_shard_hex(cas_id).to_string(), - cas_id.to_string(), - ] -} - -#[cfg(feature = "ffmpeg")] -pub(super) static THUMBNAILABLE_VIDEO_EXTENSIONS: Lazy> = Lazy::new(|| { - ALL_VIDEO_EXTENSIONS - .iter() - .cloned() - .filter(can_generate_thumbnail_for_video) - .map(Extension::Video) - .collect() -}); - -pub(super) static THUMBNAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { - ALL_IMAGE_EXTENSIONS - .iter() - .cloned() - .filter(can_generate_thumbnail_for_image) - .map(Extension::Image) - .chain( - ALL_DOCUMENT_EXTENSIONS - .iter() - .cloned() - .filter(can_generate_thumbnail_for_document) - .map(Extension::Document), - ) - .collect() -}); - -pub(super) static ALL_THUMBNAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { - #[cfg(feature = "ffmpeg")] - return THUMBNAILABLE_EXTENSIONS - .iter() - .cloned() - .chain(THUMBNAILABLE_VIDEO_EXTENSIONS.iter().cloned()) - .collect(); - - #[cfg(not(feature = "ffmpeg"))] - THUMBNAILABLE_EXTENSIONS.clone() -}); - -#[derive(Error, Debug)] -pub enum ThumbnailerError { - // Internal errors - #[error("database error: {0}")] - Database(#[from] prisma_client_rust::QueryError), - #[error(transparent)] - FileIO(#[from] FileIOError), - #[error(transparent)] - VersionManager(#[from] VersionManagerError), - #[error("failed to encode webp")] - WebPEncoding { path: Box, reason: String }, - #[error("error while converting the image")] - SdImages { - path: Box, - error: sd_images::Error, - }, - #[error("failed to execute converting task: {0}")] - Task(#[from] task::JoinError), - #[cfg(feature = "ffmpeg")] - #[error(transparent)] - FFmpeg(#[from] sd_ffmpeg::Error), - #[error("thumbnail generation timed out for {}", .0.display())] - TimedOut(Box), -} - -#[derive(Debug, Serialize, Deserialize, Clone, Copy)] -pub enum ThumbnailerEntryKind { - Image, - #[cfg(feature = "ffmpeg")] - Video, -} - -#[derive(Serialize, Deserialize, Default, Debug)] -pub struct ThumbnailerMetadata { - pub created: u32, - pub skipped: u32, -} - -#[cfg(feature = "ffmpeg")] -pub const fn can_generate_thumbnail_for_video(video_extension: &VideoExtension) -> bool { - use VideoExtension::*; - // File extensions that are specifically not supported by the thumbnailer - !matches!(video_extension, Mpg | Swf | M2v | Hevc | M2ts | Mts | Ts) -} - -pub const fn can_generate_thumbnail_for_image(image_extension: &ImageExtension) -> bool { - use ImageExtension::*; - - matches!( - image_extension, - Jpg | Jpeg | Png | Webp | Gif | Svg | Heic | Heics | Heif | Heifs | Avif | Bmp | Ico - ) -} - -pub const fn can_generate_thumbnail_for_document(document_extension: &DocumentExtension) -> bool { - use DocumentExtension::*; - - matches!(document_extension, Pdf) -} +// use crate::{library::LibraryId, util::version_manager::VersionManagerError, Node}; + +// use sd_file_ext::extensions::{ +// DocumentExtension, Extension, ImageExtension, ALL_DOCUMENT_EXTENSIONS, ALL_IMAGE_EXTENSIONS, +// }; +// use sd_utils::error::FileIOError; + +// #[cfg(feature = "ffmpeg")] +// use sd_file_ext::extensions::{VideoExtension, ALL_VIDEO_EXTENSIONS}; + +// use std::{ +// path::{Path, PathBuf}, +// time::Duration, +// }; + +// use once_cell::sync::Lazy; +// use serde::{Deserialize, Serialize}; +// use thiserror::Error; +// use tokio::task; +// use tracing::error; + +// mod clean_up; +// mod directory; +// pub mod old_actor; +// pub mod preferences; +// mod process; +// mod shard; +// mod state; +// mod worker; + +// pub use process::{BatchToProcess, GenerateThumbnailArgs}; +// pub use shard::get_shard_hex; + +// use directory::ThumbnailVersion; + +// // Files names constants +// const THUMBNAIL_CACHE_DIR_NAME: &str = "thumbnails"; +// const SAVE_STATE_FILE: &str = "thumbs_to_process.bin"; +// const VERSION_FILE: &str = "version.txt"; +// pub const WEBP_EXTENSION: &str = "webp"; +// const EPHEMERAL_DIR: &str = "ephemeral"; + +// /// This is the target pixel count for all thumbnails to be resized to, and it is eventually downscaled +// /// to [`TARGET_QUALITY`]. +// const TARGET_PX: f32 = 1048576.0; // 1024x1024 + +// /// This is the target quality that we render thumbnails at, it is a float between 0-100 +// /// and is treated as a percentage (so 60% in this case, or it's the same as multiplying by `0.6`). +// const TARGET_QUALITY: f32 = 60.0; + +// // Some time constants +// const ONE_SEC: Duration = Duration::from_secs(1); +// const THIRTY_SECS: Duration = Duration::from_secs(30); +// const HALF_HOUR: Duration = Duration::from_secs(30 * 60); + +// #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] +// pub enum ThumbnailKind { +// Ephemeral, +// Indexed(LibraryId), +// } + +// pub fn get_indexed_thumbnail_path(node: &Node, cas_id: &str, library_id: LibraryId) -> PathBuf { +// get_thumbnail_path(node, cas_id, ThumbnailKind::Indexed(library_id)) +// } + +// pub fn get_ephemeral_thumbnail_path(node: &Node, cas_id: &str) -> PathBuf { +// get_thumbnail_path(node, cas_id, ThumbnailKind::Ephemeral) +// } + +// /// This does not check if a thumbnail exists, it just returns the path that it would exist at +// fn get_thumbnail_path(node: &Node, cas_id: &str, kind: ThumbnailKind) -> PathBuf { +// let mut thumb_path = node.config.data_directory(); + +// thumb_path.push(THUMBNAIL_CACHE_DIR_NAME); +// match kind { +// ThumbnailKind::Ephemeral => thumb_path.push(EPHEMERAL_DIR), +// ThumbnailKind::Indexed(library_id) => { +// thumb_path.push(library_id.to_string()); +// } +// } +// thumb_path.push(get_shard_hex(cas_id)); +// thumb_path.push(cas_id); +// thumb_path.set_extension(WEBP_EXTENSION); + +// thumb_path +// } + +// pub fn get_indexed_thumb_key(cas_id: &str, library_id: LibraryId) -> Vec { +// get_thumb_key(cas_id, ThumbnailKind::Indexed(library_id)) +// } + +// pub fn get_ephemeral_thumb_key(cas_id: &str) -> Vec { +// get_thumb_key(cas_id, ThumbnailKind::Ephemeral) +// } + +// // this is used to pass the relevant data to the frontend so it can request the thumbnail +// // it supports extending the shard hex to support deeper directory structures in the future +// fn get_thumb_key(cas_id: &str, kind: ThumbnailKind) -> Vec { +// vec![ +// match kind { +// ThumbnailKind::Ephemeral => String::from(EPHEMERAL_DIR), +// ThumbnailKind::Indexed(library_id) => library_id.to_string(), +// }, +// get_shard_hex(cas_id).to_string(), +// cas_id.to_string(), +// ] +// } + +// #[cfg(feature = "ffmpeg")] +// pub(super) static THUMBNAILABLE_VIDEO_EXTENSIONS: Lazy> = Lazy::new(|| { +// ALL_VIDEO_EXTENSIONS +// .iter() +// .cloned() +// .filter(can_generate_thumbnail_for_video) +// .map(Extension::Video) +// .collect() +// }); + +// pub(super) static THUMBNAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { +// ALL_IMAGE_EXTENSIONS +// .iter() +// .cloned() +// .filter(can_generate_thumbnail_for_image) +// .map(Extension::Image) +// .chain( +// ALL_DOCUMENT_EXTENSIONS +// .iter() +// .cloned() +// .filter(can_generate_thumbnail_for_document) +// .map(Extension::Document), +// ) +// .collect() +// }); + +// pub(super) static ALL_THUMBNAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { +// #[cfg(feature = "ffmpeg")] +// return THUMBNAILABLE_EXTENSIONS +// .iter() +// .cloned() +// .chain(THUMBNAILABLE_VIDEO_EXTENSIONS.iter().cloned()) +// .collect(); + +// #[cfg(not(feature = "ffmpeg"))] +// THUMBNAILABLE_EXTENSIONS.clone() +// }); + +// #[derive(Error, Debug)] +// pub enum ThumbnailerError { +// // Internal errors +// #[error("database error: {0}")] +// Database(#[from] prisma_client_rust::QueryError), +// #[error(transparent)] +// FileIO(#[from] FileIOError), +// #[error(transparent)] +// VersionManager(#[from] VersionManagerError), +// #[error("failed to encode webp")] +// WebPEncoding { path: Box, reason: String }, +// #[error("error while converting the image")] +// SdImages { +// path: Box, +// error: sd_images::Error, +// }, +// #[error("failed to execute converting task: {0}")] +// Task(#[from] task::JoinError), +// #[cfg(feature = "ffmpeg")] +// #[error(transparent)] +// FFmpeg(#[from] sd_ffmpeg::Error), +// #[error("thumbnail generation timed out for {}", .0.display())] +// TimedOut(Box), +// } + +// #[cfg(feature = "ffmpeg")] +// pub const fn can_generate_thumbnail_for_video(video_extension: &VideoExtension) -> bool { +// use VideoExtension::*; +// // File extensions that are specifically not supported by the thumbnailer +// !matches!(video_extension, Mpg | Swf | M2v | Hevc | M2ts | Mts | Ts) +// } + +// pub const fn can_generate_thumbnail_for_image(image_extension: &ImageExtension) -> bool { +// use ImageExtension::*; + +// matches!( +// image_extension, +// Jpg | Jpeg | Png | Webp | Gif | Svg | Heic | Heics | Heif | Heifs | Avif | Bmp | Ico +// ) +// } + +// pub const fn can_generate_thumbnail_for_document(document_extension: &DocumentExtension) -> bool { +// use DocumentExtension::*; + +// matches!(document_extension, Pdf) +// } diff --git a/core/src/object/mod.rs b/core/src/object/mod.rs index 08c41f1a44b2..f71961a75da9 100644 --- a/core/src/object/mod.rs +++ b/core/src/object/mod.rs @@ -6,8 +6,8 @@ use specta::Type; pub mod cas; pub mod fs; pub mod media; -pub mod old_file_identifier; -pub mod old_orphan_remover; +// pub mod old_file_identifier; +// pub mod old_orphan_remover; pub mod tag; pub mod validation; diff --git a/core/src/object/old_file_identifier/mod.rs b/core/src/object/old_file_identifier/mod.rs index a9849542081b..cb94810827e0 100644 --- a/core/src/object/old_file_identifier/mod.rs +++ b/core/src/object/old_file_identifier/mod.rs @@ -22,10 +22,10 @@ use tokio::fs; use tracing::{error, trace}; use uuid::Uuid; -pub mod old_file_identifier_job; -mod shallow; +// pub mod old_file_identifier_job; +// mod shallow; -pub use shallow::*; +// pub use shallow::*; // we break these jobs into chunks of 100 to improve performance const CHUNK_SIZE: usize = 100; diff --git a/core/src/old_job/error.rs b/core/src/old_job/error.rs index 5cc4f960405e..2bc4ad87a5c8 100644 --- a/core/src/old_job/error.rs +++ b/core/src/old_job/error.rs @@ -1,8 +1,8 @@ use crate::{ location::{indexer::IndexerError, LocationError}, object::{ - fs::error::FileSystemJobsError, media::old_media_processor::MediaProcessorError, - old_file_identifier::FileIdentifierJobError, validation::ValidatorError, + fs::error::FileSystemJobsError, /*media::old_media_processor::MediaProcessorError,*/ + /*old_file_identifier::FileIdentifierJobError,*/ validation::ValidatorError, }, }; @@ -58,10 +58,10 @@ pub enum JobError { // Specific job errors #[error(transparent)] Indexer(#[from] IndexerError), - #[error(transparent)] - MediaProcessor(#[from] MediaProcessorError), - #[error(transparent)] - FileIdentifier(#[from] FileIdentifierJobError), + // #[error(transparent)] + // MediaProcessor(#[from] MediaProcessorError), + // #[error(transparent)] + // FileIdentifier(#[from] FileIdentifierJobError), #[error(transparent)] Validator(#[from] ValidatorError), #[error(transparent)] diff --git a/core/src/old_job/manager.rs b/core/src/old_job/manager.rs index 9d373c05cf4d..f27edea2c4a2 100644 --- a/core/src/old_job/manager.rs +++ b/core/src/old_job/manager.rs @@ -1,16 +1,16 @@ use crate::{ library::Library, - location::indexer::old_indexer_job::OldIndexerJobInit, + // location::indexer::old_indexer_job::OldIndexerJobInit, object::{ fs::{ old_copy::OldFileCopierJobInit, old_cut::OldFileCutterJobInit, old_delete::OldFileDeleterJobInit, old_erase::OldFileEraserJobInit, }, - media::old_media_processor::OldMediaProcessorJobInit, - old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit, + // media::old_media_processor::OldMediaProcessorJobInit, + // old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit, validation::old_validator_job::OldObjectValidatorJobInit, }, - old_job::{worker::Worker, DynJob, Job, JobError}, + old_job::{worker::Worker, DynJob, JobError, OldJob}, Node, }; @@ -102,7 +102,7 @@ impl OldJobs { self: Arc, node: &Arc, library: &Arc, - job: Box>, + job: Box>, ) -> Result<(), JobManagerError> { let job_hash = job.hash(); @@ -387,7 +387,7 @@ fn initialize_resumable_job( ) -> Result, JobError> { dispatch_call_to_job_by_name!( job_report.name.as_str(), - T -> Job::::new_from_report(job_report, next_jobs), + T -> OldJob::::new_from_report(job_report, next_jobs), default = { error!( "Unknown job type: {}, id: {}", @@ -396,9 +396,9 @@ fn initialize_resumable_job( Err(JobError::UnknownJobName(job_report.id, job_report.name)) }, jobs = [ - OldMediaProcessorJobInit, - OldIndexerJobInit, - OldFileIdentifierJobInit, + // OldMediaProcessorJobInit, + // OldIndexerJobInit, + // OldFileIdentifierJobInit, OldObjectValidatorJobInit, OldFileCutterJobInit, OldFileCopierJobInit, diff --git a/core/src/old_job/mod.rs b/core/src/old_job/mod.rs index ab22d16725bd..9bb7d86782e8 100644 --- a/core/src/old_job/mod.rs +++ b/core/src/old_job/mod.rs @@ -156,8 +156,8 @@ pub struct JobBuilder { } impl JobBuilder { - pub fn build(self) -> Box> { - Box::new(Job:: { + pub fn build(self) -> Box> { + Box::new(OldJob:: { id: self.id, hash: ::hash(&self.init), report: Some(self.report_builder.build()), @@ -197,7 +197,7 @@ impl JobBuilder { } } -pub struct Job { +pub struct OldJob { id: Uuid, hash: u64, report: Option, @@ -205,7 +205,7 @@ pub struct Job { next_jobs: VecDeque>, } -impl Job { +impl OldJob { pub fn new(init: SJob) -> Box { JobBuilder::new(init).build() } @@ -435,7 +435,7 @@ impl From> for JobStepOutput DynJob for Job { +impl DynJob for OldJob { fn id(&self) -> Uuid { // SAFETY: This method is using during queueing, so we still have a report self.report() diff --git a/crates/ai/src/old_image_labeler/process.rs b/crates/ai/src/old_image_labeler/process.rs index 8e674f3be6cd..125dbe21c215 100644 --- a/crates/ai/src/old_image_labeler/process.rs +++ b/crates/ai/src/old_image_labeler/process.rs @@ -85,7 +85,7 @@ pub(super) async fn spawned_processing( let mut queue = file_paths .into_iter() .filter_map(|file_path| { - if file_path.object_id.is_none() { + if file_path.object.is_none() { errors.push(( file_path.id, ImageLabelerError::IsolateFilePathData(MissingFieldError::new( @@ -201,7 +201,7 @@ pub(super) async fn spawned_processing( let ids = ( file_path.id, - file_path.object_id.expect("already checked above"), + file_path.object.as_ref().expect("already checked above").id, ); if output_tx.is_closed() { From d11ecb794096ab16f7126aeb99fb0329b66c081e Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Sun, 12 May 2024 03:02:35 -0300 Subject: [PATCH 02/33] Scan location roughly working, a ton of stuff to fix yet --- apps/desktop/src/platform.ts | 6 +- .../src/components/explorer/FileThumb.tsx | 15 +- apps/mobile/src/stores/explorerStore.ts | 8 +- apps/web/src/App.tsx | 6 +- .../heavy-lifting/src/file_identifier/job.rs | 4 +- .../heavy-lifting/src/file_identifier/mod.rs | 4 +- .../src/file_identifier/shallow.rs | 12 +- .../tasks/extract_file_metadata.rs | 6 +- core/crates/heavy-lifting/src/indexer/mod.rs | 10 +- .../heavy-lifting/src/indexer/shallow.rs | 12 +- .../heavy-lifting/src/job_system/job.rs | 62 +++- .../heavy-lifting/src/job_system/mod.rs | 26 +- .../heavy-lifting/src/job_system/report.rs | 11 +- .../heavy-lifting/src/job_system/runner.rs | 135 ++++--- core/crates/heavy-lifting/src/lib.rs | 2 +- .../heavy-lifting/src/media_processor/job.rs | 64 +++- .../heavy-lifting/src/media_processor/mod.rs | 14 +- .../src/media_processor/shallow.rs | 13 +- core/src/api/jobs.rs | 119 +++--- core/src/api/libraries.rs | 9 +- core/src/api/locations.rs | 19 +- core/src/api/nodes.rs | 7 +- core/src/api/search/mod.rs | 2 +- core/src/context.rs | 27 +- core/src/lib.rs | 17 +- core/src/location/manager/mod.rs | 5 + core/src/location/manager/watcher/utils.rs | 18 +- core/src/location/mod.rs | 218 ++++++----- core/src/object/media/mod.rs | 6 +- core/src/old_job/error.rs | 6 +- core/src/util/debug_initializer.rs | 3 + crates/task-system/src/system.rs | 18 +- crates/task-system/src/task.rs | 28 +- .../$libraryId/Explorer/FilePath/Thumb.tsx | 4 +- interface/app/$libraryId/Explorer/store.ts | 7 +- .../$libraryId/settings/client/general.tsx | 11 +- interface/app/index.tsx | 6 +- interface/util/Platform.tsx | 4 +- packages/client/src/core.ts | 343 +++++++++--------- packages/client/src/lib/explorerItem.ts | 10 +- 40 files changed, 788 insertions(+), 509 deletions(-) diff --git a/apps/desktop/src/platform.ts b/apps/desktop/src/platform.ts index 8956eff29211..288e887df5fa 100644 --- a/apps/desktop/src/platform.ts +++ b/apps/desktop/src/platform.ts @@ -45,9 +45,11 @@ function constructServerUrl(urlSuffix: string) { export const platform = { platform: 'tauri', - getThumbnailUrlByThumbKey: (keyParts) => + getThumbnailUrlByThumbKey: (thumbKey) => constructServerUrl( - `/thumbnail/${keyParts.map((i) => encodeURIComponent(i)).join('/')}.webp` + `/thumbnail/${encodeURIComponent( + thumbKey.base_directory_str + )}/${encodeURIComponent(thumbKey.shard_hex)}/${encodeURIComponent(thumbKey.cas_id)}.webp` ), getFileUrl: (libraryId, locationLocalId, filePathId) => constructServerUrl(`/file/${libraryId}/${locationLocalId}/${filePathId}`), diff --git a/apps/mobile/src/components/explorer/FileThumb.tsx b/apps/mobile/src/components/explorer/FileThumb.tsx index f991e0df38a8..897cc19d417b 100644 --- a/apps/mobile/src/components/explorer/FileThumb.tsx +++ b/apps/mobile/src/components/explorer/FileThumb.tsx @@ -1,24 +1,25 @@ import { DocumentDirectoryPath } from '@dr.pogodin/react-native-fs'; import { getIcon } from '@sd/assets/util'; +import { Image } from 'expo-image'; +import { useEffect, useLayoutEffect, useMemo, useState, type PropsWithChildren } from 'react'; +import { View } from 'react-native'; import { getExplorerItemData, getItemFilePath, getItemLocation, isDarkTheme, + ThumbKey, type ExplorerItem } from '@sd/client'; -import { Image } from 'expo-image'; -import { useEffect, useLayoutEffect, useMemo, useState, type PropsWithChildren } from 'react'; -import { View } from 'react-native'; import { flattenThumbnailKey, useExplorerStore } from '~/stores/explorerStore'; import { tw } from '../../lib/tailwind'; // NOTE: `file://` is required for Android to load local files! -export const getThumbnailUrlByThumbKey = (thumbKey: string[]) => { - return `file://${DocumentDirectoryPath}/thumbnails/${thumbKey - .map((i) => encodeURIComponent(i)) - .join('/')}.webp`; +export const getThumbnailUrlByThumbKey = (thumbKey: ThumbKey) => { + return `file://${DocumentDirectoryPath}/thumbnails/${encodeURIComponent( + thumbKey.base_directory_str + )}/${encodeURIComponent(thumbKey.shard_hex)}/${encodeURIComponent(thumbKey.cas_id)}.webp`; }; const FileThumbWrapper = ({ children, size = 1 }: PropsWithChildren<{ size: number }>) => ( diff --git a/apps/mobile/src/stores/explorerStore.ts b/apps/mobile/src/stores/explorerStore.ts index ca9bc9efad2f..a5019fb39950 100644 --- a/apps/mobile/src/stores/explorerStore.ts +++ b/apps/mobile/src/stores/explorerStore.ts @@ -1,4 +1,4 @@ -import { resetStore } from '@sd/client'; +import { ThumbKey, resetStore } from '@sd/client'; import { proxy, useSnapshot } from 'valtio'; import { proxySet } from 'valtio/utils'; @@ -26,14 +26,14 @@ const state = { orderDirection: 'Asc' as 'Asc' | 'Desc' }; -export function flattenThumbnailKey(thumbKey: string[]) { - return thumbKey.join('/'); +export function flattenThumbnailKey(thumbKey: ThumbKey) { + return `${thumbKey.base_directory_str}/${thumbKey.shard_hex}/${thumbKey.cas_id}`; } const store = proxy({ ...state, reset: () => resetStore(store, state), - addNewThumbnail: (thumbKey: string[]) => { + addNewThumbnail: (thumbKey: ThumbKey) => { store.newThumbnails.add(flattenThumbnailKey(thumbKey)); }, // this should be done when the explorer query is refreshed diff --git a/apps/web/src/App.tsx b/apps/web/src/App.tsx index 1619416a811b..6da60257dec8 100644 --- a/apps/web/src/App.tsx +++ b/apps/web/src/App.tsx @@ -42,8 +42,10 @@ const spacedriveURL = (() => { const platform: Platform = { platform: 'web', - getThumbnailUrlByThumbKey: (keyParts) => - `${spacedriveURL}/thumbnail/${keyParts.map((i) => encodeURIComponent(i)).join('/')}.webp`, + getThumbnailUrlByThumbKey: (thumbKey) => + `${spacedriveURL}/thumbnail/${encodeURIComponent( + thumbKey.base_directory_str + )}/${encodeURIComponent(thumbKey.shard_hex)}/${encodeURIComponent(thumbKey.cas_id)}.webp`, getFileUrl: (libraryId, locationLocalId, filePathId) => `${spacedriveURL}/file/${encodeURIComponent(libraryId)}/${encodeURIComponent( locationLocalId diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index 490b43fd7d4d..fd73854eca54 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -35,7 +35,7 @@ use futures_concurrency::future::TryJoin; use serde::{Deserialize, Serialize}; use serde_json::json; use tokio::time::Instant; -use tracing::warn; +use tracing::{trace, warn}; use super::{ orphan_path_filters_deep, orphan_path_filters_shallow, @@ -430,6 +430,8 @@ impl FileIdentifier { .exec() .await?; + trace!("Found {} orphan paths", orphan_paths.len()); + if orphan_paths.is_empty() { break; } diff --git a/core/crates/heavy-lifting/src/file_identifier/mod.rs b/core/crates/heavy-lifting/src/file_identifier/mod.rs index fdb35795fa09..996cd0e3aef5 100644 --- a/core/crates/heavy-lifting/src/file_identifier/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/mod.rs @@ -141,7 +141,7 @@ fn orphan_path_filters_shallow( )), file_path::size_in_bytes_bytes::not(Some(0u64.to_be_bytes().to_vec())), ], - [file_path_id.map(file_path::id::gte)], + [file_path_id.map(file_path::id::gt)], ) } @@ -162,7 +162,7 @@ fn orphan_path_filters_deep( ], [ // this is a workaround for the cursor not working properly - file_path_id.map(file_path::id::gte), + file_path_id.map(file_path::id::gt), maybe_sub_iso_file_path.as_ref().map(|sub_iso_file_path| { file_path::materialized_path::starts_with( sub_iso_file_path diff --git a/core/crates/heavy-lifting/src/file_identifier/shallow.rs b/core/crates/heavy-lifting/src/file_identifier/shallow.rs index dbbedb2c20e3..2faedb60077f 100644 --- a/core/crates/heavy-lifting/src/file_identifier/shallow.rs +++ b/core/crates/heavy-lifting/src/file_identifier/shallow.rs @@ -32,8 +32,8 @@ use super::{ pub async fn shallow( location: location::Data, sub_path: impl AsRef + Send, - dispatcher: BaseTaskDispatcher, - ctx: impl OuterContext, + dispatcher: &BaseTaskDispatcher, + ctx: &impl OuterContext, ) -> Result, Error> { let sub_path = sub_path.as_ref(); let db = ctx.db(); @@ -87,7 +87,7 @@ pub async fn shallow( orphans_count += orphan_paths.len() as u64; last_orphan_file_path_id = Some(last_orphan.id); - pending_running_tasks.insert(CancelTaskOnDrop( + pending_running_tasks.insert(CancelTaskOnDrop::new( dispatcher .dispatch(ExtractFileMetadataTask::new( Arc::clone(&location), @@ -115,8 +115,8 @@ pub async fn shallow( async fn process_tasks( pending_running_tasks: FutureGroup>, - dispatcher: BaseTaskDispatcher, - ctx: impl OuterContext, + dispatcher: &BaseTaskDispatcher, + ctx: &impl OuterContext, ) -> Result, Error> { let mut pending_running_tasks = pending_running_tasks.lend_mut(); @@ -141,7 +141,7 @@ async fn process_tasks( errors.extend(more_errors); if !identified_files.is_empty() { - pending_running_tasks.insert(CancelTaskOnDrop( + pending_running_tasks.insert(CancelTaskOnDrop::new( dispatcher .dispatch(ObjectProcessorTask::new( identified_files, diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs index f8dd41fdcd33..06b57e98d3d8 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs @@ -21,7 +21,7 @@ use futures::stream::{self, FuturesUnordered, StreamExt}; use futures_concurrency::stream::Merge; use serde::{Deserialize, Serialize}; use tokio::time::Instant; -use tracing::error; +use tracing::{error, trace}; use uuid::Uuid; use super::IdentifiedFile; @@ -95,6 +95,7 @@ impl Task for ExtractFileMetadataTask { } let Self { + id, location, location_path, file_paths_by_id, @@ -139,6 +140,8 @@ impl Task for ExtractFileMetadataTask { .remove(&file_path_pub_id) .expect("file_path must be here"); + trace!("Processed file , {} files remaining", file_paths_by_id.len()); + match res { Ok(FileMetadata { cas_id, kind, .. }) => { identified_files.insert( @@ -168,6 +171,7 @@ impl Task for ExtractFileMetadataTask { } StreamMessage::Interrupt(kind) => { + trace!("Task received interrupt {kind:?}: "); *extract_metadata_time += start_time.elapsed(); return Ok(match kind { InterruptionKind::Pause => ExecStatus::Paused, diff --git a/core/crates/heavy-lifting/src/indexer/mod.rs b/core/crates/heavy-lifting/src/indexer/mod.rs index 78b9d3827f66..3d2cb8a49475 100644 --- a/core/crates/heavy-lifting/src/indexer/mod.rs +++ b/core/crates/heavy-lifting/src/indexer/mod.rs @@ -27,7 +27,7 @@ use std::{ }; use itertools::Itertools; -use prisma_client_rust::{operator::or, Select}; +use prisma_client_rust::{operator::or, QueryError, Select}; use rspc::ErrorCode; use serde::{Deserialize, Serialize}; use specta::Type; @@ -53,8 +53,8 @@ pub enum Error { SubPath(#[from] sub_path::Error), // Internal Errors - #[error("database Error: {0}")] - Database(#[from] prisma_client_rust::QueryError), + #[error("database error: {0}")] + Database(#[from] QueryError), #[error(transparent)] FileIO(#[from] FileIOError), #[error(transparent)] @@ -241,7 +241,7 @@ async fn remove_non_existing_file_paths( } #[allow(clippy::missing_panics_doc)] // Can't actually panic as we only deal with directories -async fn reverse_update_directories_sizes( +pub async fn reverse_update_directories_sizes( base_path: impl AsRef + Send, location_id: location::id::Type, location_path: impl AsRef + Send, @@ -345,7 +345,7 @@ async fn compute_sizes( pub_id_by_ancestor_materialized_path: &mut HashMap, db: &PrismaClient, errors: &mut Vec, -) -> Result<(), Error> { +) -> Result<(), QueryError> { db.file_path() .find_many(vec![ file_path::location_id::equals(Some(location_id)), diff --git a/core/crates/heavy-lifting/src/indexer/shallow.rs b/core/crates/heavy-lifting/src/indexer/shallow.rs index 085b6f1a50ce..c238998ffb4c 100644 --- a/core/crates/heavy-lifting/src/indexer/shallow.rs +++ b/core/crates/heavy-lifting/src/indexer/shallow.rs @@ -33,8 +33,8 @@ use super::{ pub async fn shallow( location: location_with_indexer_rules::Data, sub_path: impl AsRef + Send, - dispatcher: BaseTaskDispatcher, - ctx: impl OuterContext, + dispatcher: &BaseTaskDispatcher, + ctx: &impl OuterContext, ) -> Result, Error> { let sub_path = sub_path.as_ref(); let db = ctx.db(); @@ -64,7 +64,7 @@ pub async fn shallow( Arc::clone(&location_path), Arc::clone(&to_walk_path), Arc::clone(db), - &dispatcher, + dispatcher, ) .await? else { @@ -82,7 +82,7 @@ pub async fn shallow( to_update, Arc::clone(db), Arc::clone(sync), - &dispatcher, + dispatcher, ) .await? else { @@ -109,7 +109,7 @@ pub async fn shallow( .await?; } - update_location_size(location.id, db, &ctx).await?; + update_location_size(location.id, db, ctx).await?; } if indexed_count > 0 || removed_count > 0 { @@ -222,7 +222,7 @@ async fn save_and_update( .dispatch_many_boxed(save_and_update_tasks) .await .into_iter() - .map(CancelTaskOnDrop) + .map(CancelTaskOnDrop::new) .collect::>() .try_join() .await? diff --git a/core/crates/heavy-lifting/src/job_system/job.rs b/core/crates/heavy-lifting/src/job_system/job.rs index b0e5132cfa98..2ff647e0dda1 100644 --- a/core/crates/heavy-lifting/src/job_system/job.rs +++ b/core/crates/heavy-lifting/src/job_system/job.rs @@ -31,7 +31,7 @@ use tokio::{ spawn, sync::{watch, Mutex}, }; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, info, trace, warn}; use uuid::Uuid; use super::{ @@ -52,6 +52,7 @@ pub enum JobName { // TODO: Add more job names as needed } +#[derive(Debug)] pub enum ReturnStatus { Completed(JobReturn), Shutdown(Result>, rmp_serde::encode::Error>), @@ -147,7 +148,7 @@ where } } -impl IntoJob for JobBuilder +impl IntoJob for JobEnqueuer where J: Job + SerializableJob, OuterCtx: OuterContext, @@ -276,7 +277,7 @@ pub enum JobOutputData { // TODO: Add more types } -pub struct JobBuilder +pub struct JobEnqueuer where J: Job + SerializableJob, OuterCtx: OuterContext, @@ -289,19 +290,19 @@ where _ctx: PhantomData, } -impl JobBuilder +impl JobEnqueuer where J: Job + SerializableJob, OuterCtx: OuterContext, JobCtx: JobContext, { - pub fn build(self) -> Box> { + fn build(self) -> Box> { Box::new(JobHolder { id: self.id, job: self.job, report: self.report_builder.build(), - next_jobs: VecDeque::new(), - _ctx: PhantomData, + next_jobs: self.next_jobs, + _ctx: self._ctx, }) } @@ -338,7 +339,7 @@ where pub fn enqueue_next(mut self, next: impl Job + SerializableJob) -> Self { let next_job_order = self.next_jobs.len() + 1; - let mut child_job_builder = JobBuilder::new(next).with_parent_id(self.id); + let mut child_job_builder = JobEnqueuer::new(next).with_parent_id(self.id); if let Some(parent_action) = &self.report_builder.action { child_job_builder = @@ -365,6 +366,7 @@ where } pub struct JobHandle> { + pub(crate) id: JobId, pub(crate) next_jobs: VecDeque>>, pub(crate) ctx: JobCtx, pub(crate) commands_tx: chan::Sender, @@ -372,6 +374,10 @@ pub struct JobHandle> { impl> JobHandle { pub async fn send_command(&mut self, command: Command) -> Result<(), JobSystemError> { + trace!( + "Handle sending command {command:?} to ", + self.id + ); if self.commands_tx.send(command).await.is_err() { warn!("Tried to send a {command:?} to a job that was already completed"); @@ -395,6 +401,11 @@ impl> JobHandle", + next_job_report.id + ); + next_job_report.update(self.ctx.db()).await }) .collect::>() @@ -408,6 +419,8 @@ impl> JobHandle, ) -> Result<(), JobSystemError> { + trace!("Handle registering start of ", self.id); + let Self { next_jobs, ctx, .. } = self; let mut report = ctx.report_mut().await; @@ -432,6 +445,10 @@ impl> JobHandle", + next_job_report.id + ); if next_job_report.created_at.is_none() { next_job_report.create(db).await } else { @@ -453,14 +470,20 @@ impl> JobHandle", self.id); + let output = JobOutput::prepare_output_and_report(job_return, &mut report); report.update(ctx.db()).await?; + trace!("Handle completed ", self.id); + Ok(output) } pub async fn failed_job(&mut self, e: &Error) -> Result<(), JobSystemError> { + trace!("Handle registering failed job ", self.id); + let db = self.ctx.db(); { let mut report = self.ctx.report_mut().await; @@ -477,10 +500,17 @@ impl> JobHandle", + self.id + ); + self.command_children(Command::Cancel).await } pub async fn shutdown_pause_job(&mut self) -> Result<(), JobSystemError> { + trace!("Handle pausing job on shutdown: ", self.id); + let db = self.ctx.db(); { @@ -500,6 +530,7 @@ impl> JobHandle Result<(), JobSystemError> { + trace!("Handle canceling job: ", self.id); let db = self.ctx.db(); { @@ -516,6 +547,11 @@ impl> JobHandle", + self.id + ); + self.command_children(Command::Cancel).await } } @@ -602,6 +638,8 @@ where let ctx = JobCtx::new(self.report, ctx); + trace!("Dispatching job ", self.id); + spawn(to_spawn_job::( self.id, self.job, @@ -613,6 +651,7 @@ where )); JobHandle { + id: self.id, next_jobs: self.next_jobs, ctx, commands_tx, @@ -630,6 +669,8 @@ where let ctx = JobCtx::new(self.report, ctx); + trace!("Resuming job ", self.id); + spawn(to_spawn_job::( self.id, self.job, @@ -641,6 +682,7 @@ where )); JobHandle { + id: self.id, next_jobs: self.next_jobs, ctx, commands_tx, @@ -698,6 +740,7 @@ async fn to_spawn_job>( match command { Command::Pause => { + trace!("Pausing job ", id); running_state_tx.send_modify(|state| *state = JobRunningState::Paused); remote_controllers .iter() @@ -715,6 +758,7 @@ async fn to_spawn_job>( }); } Command::Resume => { + trace!("Resuming job ", id); running_state_tx.send_modify(|state| *state = JobRunningState::Running); remote_controllers @@ -733,6 +777,7 @@ async fn to_spawn_job>( }); } Command::Cancel => { + trace!("Canceling job ", id); remote_controllers .iter() .map(TaskRemoteController::cancel) @@ -749,6 +794,7 @@ async fn to_spawn_job>( } StreamMessage::Done(res) => { + trace!("Job done", id); #[cfg(debug_assertions)] { // Just a sanity check to make sure we don't have any pending tasks left diff --git a/core/crates/heavy-lifting/src/job_system/mod.rs b/core/crates/heavy-lifting/src/job_system/mod.rs index 140425b72244..c1967876377a 100644 --- a/core/crates/heavy-lifting/src/job_system/mod.rs +++ b/core/crates/heavy-lifting/src/job_system/mod.rs @@ -7,6 +7,7 @@ use sd_utils::error::FileIOError; use std::{ cell::RefCell, collections::hash_map::HashMap, + panic, path::{Path, PathBuf}, sync::Arc, }; @@ -15,7 +16,7 @@ use async_channel as chan; use futures::Stream; use futures_concurrency::future::{Join, TryJoin}; use tokio::{fs, spawn, sync::oneshot, task::JoinHandle}; -use tracing::{error, info, trace, warn}; +use tracing::{debug, error, info, trace, warn}; use uuid::Uuid; mod error; @@ -176,7 +177,7 @@ impl> JobSystem>( - &mut self, + &self, job: impl IntoJob + Send, location_id: location::id::Type, ctx: OuterCtx, @@ -254,13 +255,20 @@ async fn load_stored_job_entries>>( - &fs::read(store_jobs_file).await.map_err(|e| { - JobSystemError::StoredJobs(FileIOError::from(( - store_jobs_file, - e, - "Failed to load jobs from disk", - ))) - })?, + &match fs::read(store_jobs_file).await { + Ok(bytes) => bytes, + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + debug!("No pending jobs found on disk"); + return Ok(()); + } + Err(e) => { + return Err(JobSystemError::StoredJobs(FileIOError::from(( + store_jobs_file, + e, + "Failed to load jobs from disk", + )))) + } + }, )?; stores_jobs_by_db diff --git a/core/crates/heavy-lifting/src/job_system/report.rs b/core/crates/heavy-lifting/src/job_system/report.rs index dbb9af221c73..f6d9dab17cbd 100644 --- a/core/crates/heavy-lifting/src/job_system/report.rs +++ b/core/crates/heavy-lifting/src/job_system/report.rs @@ -1,7 +1,7 @@ -use sd_prisma::prisma::{job, PrismaClient}; +use sd_prisma::prisma::{job, location, PrismaClient}; use sd_utils::db::{maybe_missing, MissingFieldError}; -use std::{collections::HashMap, fmt, str::FromStr}; +use std::{collections::HashMap, fmt, path::PathBuf, str::FromStr}; use chrono::{DateTime, Utc}; use prisma_client_rust::QueryError; @@ -62,14 +62,15 @@ pub enum ReportMetadata { #[derive(Debug, Serialize, Deserialize, Type, Clone)] pub enum ReportInputMetadata { - Placeholder, - // TODO: Add more types + // TODO: Add more variants as needed + Location(location::Data), + SubPath(PathBuf), } #[derive(Debug, Serialize, Deserialize, Type, Clone)] pub enum ReportOutputMetadata { Metrics(HashMap), - // TODO: Add more types + // TODO: Add more variants as needed } #[derive(Debug, Serialize, Type, Clone)] diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index 59dc555d13e5..b2476f3a7877 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -22,7 +22,7 @@ use tokio::{ time::{interval_at, Instant}, }; use tokio_stream::wrappers::IntervalStream; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, info, trace, warn}; use uuid::Uuid; use super::{ @@ -64,14 +64,18 @@ pub(super) enum RunnerMessage> { - base_dispatcher: BaseTaskDispatcher, - handles: HashMap>, +struct JobsWorktables { job_hashes: HashMap, job_hashes_by_id: HashMap, running_jobs_by_job_id: HashMap, running_jobs_set: HashSet<(JobName, location::id::Type)>, jobs_to_store_by_ctx_id: HashMap>, +} + +pub(super) struct JobSystemRunner> { + base_dispatcher: BaseTaskDispatcher, + handles: HashMap>, + worktables: JobsWorktables, job_return_status_tx: chan::Sender<(JobId, Result)>, job_outputs_tx: chan::Sender<(JobId, Result)>, } @@ -85,11 +89,13 @@ impl> JobSystemRunner> JobSystemRunner> JobSystemRunner bool { - self.handles.is_empty() && self.job_hashes.is_empty() && self.job_hashes_by_id.is_empty() + self.handles.is_empty() + && self.worktables.job_hashes.is_empty() + && self.worktables.job_hashes_by_id.is_empty() } fn check_if_job_are_running( @@ -205,40 +217,46 @@ impl> JobSystemRunner, location_id: location::id::Type, ) -> bool { - job_names - .into_iter() - .any(|job_name| self.running_jobs_set.contains(&(job_name, location_id))) + job_names.into_iter().any(|job_name| { + self.worktables + .running_jobs_set + .contains(&(job_name, location_id)) + }) } async fn process_return_status(&mut self, job_id: JobId, status: Result) { let Self { handles, - job_hashes, - job_hashes_by_id, + worktables, job_outputs_tx, job_return_status_tx, base_dispatcher, - jobs_to_store_by_ctx_id, - running_jobs_by_job_id, - running_jobs_set, .. } = self; - let job_hash = job_hashes_by_id.remove(&job_id).expect("it must be here"); - let (job_name, location_id) = running_jobs_by_job_id + let job_hash = worktables + .job_hashes_by_id + .remove(&job_id) + .expect("it must be here"); + + let (job_name, location_id) = worktables + .running_jobs_by_job_id .remove(&job_id) .expect("a JobName and location_id must've been inserted in the map with the job id"); - assert!(running_jobs_set.remove(&(job_name, location_id))); - assert!(job_hashes.remove(&job_hash).is_some()); + assert!(worktables.running_jobs_set.remove(&(job_name, location_id))); + + assert!(worktables.job_hashes.remove(&job_hash).is_some()); let mut handle = handles.remove(&job_id).expect("it must be here"); let res = match status { Ok(ReturnStatus::Completed(job_return)) => { + trace!("Job completed and will try to dispatch children jobs: "); try_dispatch_next_job( &mut handle, + location_id, base_dispatcher.clone(), - (job_hashes, job_hashes_by_id), + worktables, handles, job_return_status_tx.clone(), ); @@ -280,7 +298,8 @@ impl> JobSystemRunner> JobSystemRunner JOBS_INITIAL_CAPACITY - && self.job_hashes.len() < JOBS_INITIAL_CAPACITY + if self.worktables.job_hashes.capacity() > JOBS_INITIAL_CAPACITY + && self.worktables.job_hashes.len() < JOBS_INITIAL_CAPACITY { - self.job_hashes.shrink_to(JOBS_INITIAL_CAPACITY); + self.worktables.job_hashes.shrink_to(JOBS_INITIAL_CAPACITY); } - if self.job_hashes_by_id.capacity() > JOBS_INITIAL_CAPACITY - && self.job_hashes_by_id.len() < JOBS_INITIAL_CAPACITY + if self.worktables.job_hashes_by_id.capacity() > JOBS_INITIAL_CAPACITY + && self.worktables.job_hashes_by_id.len() < JOBS_INITIAL_CAPACITY { - self.job_hashes_by_id.shrink_to(JOBS_INITIAL_CAPACITY); + self.worktables + .job_hashes_by_id + .shrink_to(JOBS_INITIAL_CAPACITY); } - if self.running_jobs_by_job_id.capacity() > JOBS_INITIAL_CAPACITY - && self.running_jobs_by_job_id.len() < JOBS_INITIAL_CAPACITY + if self.worktables.running_jobs_by_job_id.capacity() > JOBS_INITIAL_CAPACITY + && self.worktables.running_jobs_by_job_id.len() < JOBS_INITIAL_CAPACITY { - self.running_jobs_by_job_id.shrink_to(JOBS_INITIAL_CAPACITY); + self.worktables + .running_jobs_by_job_id + .shrink_to(JOBS_INITIAL_CAPACITY); } - if self.running_jobs_set.capacity() > JOBS_INITIAL_CAPACITY - && self.running_jobs_set.len() < JOBS_INITIAL_CAPACITY + if self.worktables.running_jobs_set.capacity() > JOBS_INITIAL_CAPACITY + && self.worktables.running_jobs_set.len() < JOBS_INITIAL_CAPACITY { - self.running_jobs_set.shrink_to(JOBS_INITIAL_CAPACITY); + self.worktables + .running_jobs_set + .shrink_to(JOBS_INITIAL_CAPACITY); } } @@ -363,9 +388,13 @@ impl> JobSystemRunner> JobSystemRunner>( handle: &mut JobHandle, + location_id: location::id::Type, base_dispatcher: BaseTaskDispatcher, - (job_hashes, job_hashes_by_id): (&mut HashMap, &mut HashMap), + JobsWorktables { + job_hashes, + job_hashes_by_id, + running_jobs_by_job_id, + running_jobs_set, + .. + }: &mut JobsWorktables, handles: &mut HashMap>, job_return_status_tx: chan::Sender<(JobId, Result)>, ) { if let Some(next) = handle.next_jobs.pop_front() { let next_id = next.id(); let next_hash = next.hash(); + let next_name = next.job_name(); + if let Entry::Vacant(e) = job_hashes.entry(next_hash) { e.insert(next_id); + trace!( + "Dispatching next job: ", + next.job_name() + ); job_hashes_by_id.insert(next_id, next_hash); + running_jobs_by_job_id.insert(next_id, (next_name, location_id)); + running_jobs_set.insert((next_name, location_id)); let mut next_handle = next.dispatch( base_dispatcher, handle.ctx.get_outer_ctx(), @@ -419,6 +463,8 @@ fn try_dispatch_next_job>( } else { warn!("Unexpectedly found a job with the same hash as the next job: ", next.job_name()); } + } else { + trace!("No next jobs to dispatch"); } } @@ -449,6 +495,7 @@ pub(super) async fn run>( match msg { // Job return status messages StreamMessage::ReturnStatus((job_id, status)) => { + trace!("Received return status for job: "); runner.process_return_status(job_id, status).await; } diff --git a/core/crates/heavy-lifting/src/lib.rs b/core/crates/heavy-lifting/src/lib.rs index 611815c3bbbb..b9401559fd94 100644 --- a/core/crates/heavy-lifting/src/lib.rs +++ b/core/crates/heavy-lifting/src/lib.rs @@ -45,7 +45,7 @@ use media_processor::ThumbKey; pub use job_system::{ job::{ - IntoJob, JobBuilder, JobContext, JobName, JobOutput, JobOutputData, OuterContext, + IntoJob, JobContext, JobEnqueuer, JobName, JobOutput, JobOutputData, OuterContext, ProgressUpdate, }, JobId, JobSystem, JobSystemError, diff --git a/core/crates/heavy-lifting/src/media_processor/job.rs b/core/crates/heavy-lifting/src/media_processor/job.rs index 896d0626bcf4..150cdff33931 100644 --- a/core/crates/heavy-lifting/src/media_processor/job.rs +++ b/core/crates/heavy-lifting/src/media_processor/job.rs @@ -15,7 +15,7 @@ use sd_core_prisma_helpers::file_path_for_media_processor; use sd_core_sync::Manager as SyncManager; use sd_file_ext::extensions::Extension; -use sd_prisma::prisma::{location, PrismaClient}; +use sd_prisma::prisma::{file_path, location, object, PrismaClient}; use sd_task_system::{ AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskOutput, TaskStatus, @@ -720,17 +720,66 @@ async fn get_all_children_files_by_extensions( parent_iso_file_path: &IsolatedFilePathData<'_>, extensions: &[Extension], ) -> Result, media_processor::Error> { + #[derive(Deserialize)] + struct RawFilePathForMediaProcessor { + id: file_path::id::Type, + materialized_path: file_path::materialized_path::Type, + is_dir: file_path::is_dir::Type, + name: file_path::name::Type, + extension: file_path::extension::Type, + cas_id: file_path::cas_id::Type, + object_id: object::id::Type, + object_pub_id: object::pub_id::Type, + } + + impl From for file_path_for_media_processor::Data { + fn from( + RawFilePathForMediaProcessor { + id, + materialized_path, + is_dir, + name, + extension, + cas_id, + object_id, + object_pub_id, + }: RawFilePathForMediaProcessor, + ) -> Self { + Self { + id, + materialized_path, + is_dir, + name, + extension, + cas_id, + object: Some(file_path_for_media_processor::object::Data { + id: object_id, + pub_id: object_pub_id, + }), + } + } + } + // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite // We have no data coming from the user, so this is sql injection safe - db._query_raw(raw!( + db._query_raw::(raw!( &format!( - "SELECT id, materialized_path, is_dir, name, extension, cas_id, object_id + "SELECT + file_path.id, + file_path.materialized_path, + file_path.is_dir, + file_path.name, + file_path.extension, + file_path.cas_id, + object.id as 'object_id', + object.pub_id as 'object_pub_id' FROM file_path + INNER JOIN object ON object.id = file_path.object_id WHERE - location_id={{}} - AND cas_id IS NOT NULL - AND LOWER(extension) IN ({}) - AND materialized_path LIKE {{}} + file_path.location_id={{}} + AND file_path.cas_id IS NOT NULL + AND LOWER(file_path.extension) IN ({}) + AND file_path.materialized_path LIKE {{}} ORDER BY materialized_path ASC", // Ordering by materialized_path so we can prioritize processing the first files // in the above part of the directories tree @@ -750,6 +799,7 @@ async fn get_all_children_files_by_extensions( )) .exec() .await + .map(|raw_files| raw_files.into_iter().map(Into::into).collect()) .map_err(Into::into) } diff --git a/core/crates/heavy-lifting/src/media_processor/mod.rs b/core/crates/heavy-lifting/src/media_processor/mod.rs index 800763b02a61..02ab0481ef1d 100644 --- a/core/crates/heavy-lifting/src/media_processor/mod.rs +++ b/core/crates/heavy-lifting/src/media_processor/mod.rs @@ -23,10 +23,14 @@ pub use helpers::{ exif_media_data, ffmpeg_media_data, thumbnailer::{ can_generate_thumbnail_for_document, can_generate_thumbnail_for_image, - can_generate_thumbnail_for_video, generate_single_thumbnail, get_shard_hex, - get_thumbnails_directory, GenerateThumbnailArgs, ThumbKey, ThumbnailKind, WEBP_EXTENSION, + generate_single_thumbnail, get_shard_hex, get_thumbnails_directory, GenerateThumbnailArgs, + ThumbKey, ThumbnailKind, WEBP_EXTENSION, }, }; + +#[cfg(feature = "ffmpeg")] +pub use helpers::thumbnailer::can_generate_thumbnail_for_video; + pub use shallow::shallow; use self::thumbnailer::NewThumbnailReporter; @@ -50,7 +54,11 @@ pub enum Error { impl From for rspc::Error { fn from(e: Error) -> Self { - Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e) + match e { + Error::SubPath(sub_path_err) => sub_path_err.into(), + + _ => Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e), + } } } diff --git a/core/crates/heavy-lifting/src/media_processor/shallow.rs b/core/crates/heavy-lifting/src/media_processor/shallow.rs index 5e5d1522303d..e9520173996f 100644 --- a/core/crates/heavy-lifting/src/media_processor/shallow.rs +++ b/core/crates/heavy-lifting/src/media_processor/shallow.rs @@ -36,8 +36,8 @@ use super::{ pub async fn shallow( location: location::Data, sub_path: impl AsRef + Send, - dispatcher: BaseTaskDispatcher, - ctx: impl OuterContext, + dispatcher: &BaseTaskDispatcher, + ctx: &impl OuterContext, ) -> Result, Error> { let sub_path = sub_path.as_ref(); @@ -71,16 +71,16 @@ pub async fn shallow( ctx.sync(), &sub_iso_file_path, &location_path, - &dispatcher, + dispatcher, ) .await? .into_iter() - .map(CancelTaskOnDrop) + .map(CancelTaskOnDrop::new) .chain( - dispatch_thumbnailer_tasks(&sub_iso_file_path, false, &location_path, &dispatcher, &ctx) + dispatch_thumbnailer_tasks(&sub_iso_file_path, false, &location_path, dispatcher, ctx) .await? .into_iter() - .map(CancelTaskOnDrop), + .map(CancelTaskOnDrop::new), ) .collect::>(); @@ -193,6 +193,7 @@ async fn get_files_by_extensions( WHERE location_id={{}} AND cas_id IS NOT NULL + AND object_id IS NOT NULL AND LOWER(extension) IN ({}) AND materialized_path = {{}}", extensions diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index 666c34f5d834..fd0a9d32e1fe 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -1,10 +1,14 @@ use crate::{ + context::NodeContext, invalidate_query, location::{find_location, LocationError}, object::validation::old_validator_job::OldObjectValidatorJobInit, old_job::{JobReport, JobStatus, OldJob, OldJobs}, }; +use sd_core_heavy_lifting::{ + file_identifier::FileIdentifier, media_processor::job::MediaProcessor, +}; use sd_core_prisma_helpers::job_without_data; use sd_prisma::prisma::{job, location, SortOrder}; @@ -12,6 +16,7 @@ use sd_prisma::prisma::{job, location, SortOrder}; use std::{ collections::{hash_map::Entry, BTreeMap, HashMap, VecDeque}, path::PathBuf, + sync::Arc, time::Instant, }; @@ -246,50 +251,50 @@ pub(crate) fn mount() -> AlphaRouter { return Err(LocationError::IdNotFound(id).into()); }; - OldJob::new(OldMediaProcessorJobInit { - location, - sub_path: Some(path), - regenerate_thumbnails: regenerate, - regenerate_labels: false, - }) - .spawn(&node, &library) - .await - .map_err(Into::into) - }, - ) - }) - .procedure("generateLabelsForLocation", { - #[derive(Type, Deserialize)] - pub struct GenerateLabelsForLocationArgs { - pub id: location::id::Type, - pub path: PathBuf, - #[serde(default)] - pub regenerate: bool, - } - - R.with2(library()).mutation( - |(node, library), - GenerateLabelsForLocationArgs { - id, - path, - regenerate, - }: GenerateLabelsForLocationArgs| async move { - let Some(location) = find_location(&library, id).exec().await? else { - return Err(LocationError::IdNotFound(id).into()); - }; - - OldJob::new(OldMediaProcessorJobInit { - location, - sub_path: Some(path), - regenerate_thumbnails: false, - regenerate_labels: regenerate, - }) - .spawn(&node, &library) - .await - .map_err(Into::into) + node.job_system + .dispatch( + MediaProcessor::new(location, Some(path), regenerate)?, + id, + NodeContext { + node: Arc::clone(&node), + library, + }, + ) + .await + .map_err(Into::into) }, ) }) + // .procedure("generateLabelsForLocation", { + // #[derive(Type, Deserialize)] + // pub struct GenerateLabelsForLocationArgs { + // pub id: location::id::Type, + // pub path: PathBuf, + // #[serde(default)] + // pub regenerate: bool, + // } + // R.with2(library()).mutation( + // |(node, library), + // GenerateLabelsForLocationArgs { + // id, + // path, + // regenerate, + // }: GenerateLabelsForLocationArgs| async move { + // let Some(location) = find_location(&library, id).exec().await? else { + // return Err(LocationError::IdNotFound(id).into()); + // }; + // OldJob::new(OldMediaProcessorJobInit { + // location, + // sub_path: Some(path), + // regenerate_thumbnails: false, + // regenerate_labels: regenerate, + // }) + // .spawn(&node, &library) + // .await + // .map_err(Into::into) + // }, + // ) + // }) .procedure("objectValidator", { #[derive(Type, Deserialize)] pub struct ObjectValidatorArgs { @@ -320,18 +325,30 @@ pub(crate) fn mount() -> AlphaRouter { } R.with2(library()).mutation( - |(node, library), args: IdentifyUniqueFilesArgs| async move { - let Some(location) = find_location(&library, args.id).exec().await? else { - return Err(LocationError::IdNotFound(args.id).into()); + |(node, library), IdentifyUniqueFilesArgs { id, path }: IdentifyUniqueFilesArgs| async move { + let Some(location) = find_location(&library, id).exec().await? else { + return Err(LocationError::IdNotFound(id).into()); }; - OldJob::new(OldFileIdentifierJobInit { - location, - sub_path: Some(args.path), - }) - .spawn(&node, &library) - .await - .map_err(Into::into) + // OldJob::new(OldFileIdentifierJobInit { + // location, + // sub_path: Some(args.path), + // }) + // .spawn(&node, &library) + // .await + // .map_err(Into::into) + + node.job_system + .dispatch( + FileIdentifier::new(location, Some(path))?, + id, + NodeContext { + node: Arc::clone(&node), + library, + }, + ) + .await + .map_err(Into::into) }, ) }) diff --git a/core/src/api/libraries.rs b/core/src/api/libraries.rs index 4d63b9b690cb..212ec75c54f1 100644 --- a/core/src/api/libraries.rs +++ b/core/src/api/libraries.rs @@ -8,6 +8,7 @@ use crate::{ use futures::StreamExt; use prisma_client_rust::raw; +use sd_core_heavy_lifting::JobId; use sd_file_ext::kind::ObjectKind; use sd_p2p::RemoteIdentity; use sd_prisma::prisma::{indexer_rule, object, statistics}; @@ -181,13 +182,13 @@ pub(crate) fn mount() -> AlphaRouter { }: DefaultLocations, node: Arc, library: Arc, - ) -> Result<(), rspc::Error> { + ) -> Result, rspc::Error> { // If all of them are false, we skip if [!desktop, !documents, !downloads, !pictures, !music, !videos] .into_iter() .all(identity) { - return Ok(()); + return Ok(None); } let Some(default_locations_paths) = UserDirs::new() else { @@ -242,7 +243,7 @@ pub(crate) fn mount() -> AlphaRouter { .await .map_err(rspc::Error::from)? else { - return Ok(()); + return Ok(None); }; let scan_state = ScanState::try_from(location.scan_state)?; @@ -283,7 +284,7 @@ pub(crate) fn mount() -> AlphaRouter { debug!("Created default locations"); - Ok(()) + Ok(None) } R.mutation( diff --git a/core/src/api/locations.rs b/core/src/api/locations.rs index 5d9a9832441e..2a67fc6549bd 100644 --- a/core/src/api/locations.rs +++ b/core/src/api/locations.rs @@ -5,12 +5,11 @@ use crate::{ relink_location, scan_location, scan_location_sub_path, LocationCreateArgs, LocationError, LocationUpdateArgs, ScanState, }, - old_job::StatefulJob, p2p::PeerMetadata, util::AbortOnDrop, }; -use sd_core_heavy_lifting::media_processor::ThumbKey; +use sd_core_heavy_lifting::{media_processor::ThumbKey, JobName}; use sd_core_indexer_rules::IndexerRuleCreateArgs; use sd_core_prisma_helpers::{ file_path_for_frontend, label_with_objects, location_with_indexer_rules, object_with_file_paths, @@ -405,13 +404,15 @@ pub(crate) fn mount() -> AlphaRouter { sub_path, }: LightScanArgs| async move { if node - .old_jobs - .has_job_running(|job_identity| { - job_identity.target_location == location_id - && (job_identity.name == ::NAME - || job_identity.name - == ::NAME) - }) + .job_system + .check_running_jobs( + vec![ + JobName::Indexer, + JobName::FileIdentifier, + JobName::MediaProcessor, + ], + location_id, + ) .await { return Err(rspc::Error::new( diff --git a/core/src/api/nodes.rs b/core/src/api/nodes.rs index e477ec7f830e..00b42781c4ff 100644 --- a/core/src/api/nodes.rs +++ b/core/src/api/nodes.rs @@ -176,12 +176,9 @@ pub(crate) fn mount() -> AlphaRouter { pub background_processing_percentage: u8, // 0-100 } R.mutation( - |node, - UpdateThumbnailerPreferences { - background_processing_percentage, - }: UpdateThumbnailerPreferences| async move { + |node, UpdateThumbnailerPreferences { .. }: UpdateThumbnailerPreferences| async move { node.config - .update_preferences(|preferences| { + .update_preferences(|_| { // TODO(fogodev): remove this crap // preferences // .thumbnailer diff --git a/core/src/api/search/mod.rs b/core/src/api/search/mod.rs index d9badd0e5164..4781c450c4e3 100644 --- a/core/src/api/search/mod.rs +++ b/core/src/api/search/mod.rs @@ -349,7 +349,7 @@ pub fn mount() -> AlphaRouter { items.push(ExplorerItem::Object { thumbnail: cas_id // .filter(|_| thumbnail_exists_locally) - .map(|cas_id| get_indexed_thumb_key(cas_id, library.id)), + .map(|cas_id| ThumbKey::new_indexed(cas_id, library.id)), item: object, has_created_thumbnail, }); diff --git a/core/src/context.rs b/core/src/context.rs index 03c05e52c056..b394f680af9d 100644 --- a/core/src/context.rs +++ b/core/src/context.rs @@ -22,25 +22,20 @@ pub struct NodeContext { pub library: Arc, } -mod sealed { - use crate::{library::Library, Node}; - - use std::sync::Arc; +pub trait NodeContextExt: sealed::Sealed { + fn library(&self) -> &Arc; +} - pub(super) trait Sealed { - fn library(&self) -> &Arc; - fn node(&self) -> &Arc; - } +mod sealed { + pub trait Sealed {} } -impl sealed::Sealed for NodeContext { +impl sealed::Sealed for NodeContext {} + +impl NodeContextExt for NodeContext { fn library(&self) -> &Arc { &self.library } - - fn node(&self) -> &Arc { - &self.node - } } impl OuterContext for NodeContext { @@ -83,13 +78,13 @@ impl OuterContext for NodeContext { } #[derive(Clone)] -pub struct JobContext { +pub struct JobContext { outer_ctx: OuterCtx, report: Arc>, start_time: DateTime, } -impl OuterContext for JobContext { +impl OuterContext for JobContext { fn id(&self) -> Uuid { self.outer_ctx.id() } @@ -119,7 +114,7 @@ impl OuterContext for JobContext sd_core_heavy_lifting::JobContext +impl sd_core_heavy_lifting::JobContext for JobContext { fn new(report: Report, outer_ctx: OuterCtx) -> Self { diff --git a/core/src/lib.rs b/core/src/lib.rs index cb55c4b96f16..f5d6c532f481 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -7,6 +7,7 @@ use crate::{ // object::media::old_thumbnail::old_actor::OldThumbnailer, }; +use futures_concurrency::future::Join; use sd_core_heavy_lifting::JobSystem; #[cfg(feature = "ai")] @@ -22,7 +23,6 @@ use notifications::Notifications; use reqwest::{RequestBuilder, Response}; use std::{ - collections::HashMap, fmt, path::{Path, PathBuf}, sync::{atomic::AtomicBool, Arc}, @@ -289,9 +289,18 @@ impl Node { pub async fn shutdown(&self) { info!("Spacedrive shutting down..."); - // self.thumbnailer.shutdown().await; - self.old_jobs.shutdown().await; - self.p2p.shutdown().await; + + // Let's shutdown the task system first, as the job system will receive tasks to save + self.task_system.shutdown().await; + + ( + self.old_jobs.shutdown(), + self.p2p.shutdown(), + self.job_system.shutdown(), + ) + .join() + .await; + #[cfg(feature = "ai")] if let Some(image_labeller) = &self.old_image_labeller { image_labeller.shutdown().await; diff --git a/core/src/location/manager/mod.rs b/core/src/location/manager/mod.rs index 93e1e69eea34..787ef6519c56 100644 --- a/core/src/location/manager/mod.rs +++ b/core/src/location/manager/mod.rs @@ -6,6 +6,7 @@ use crate::{ use sd_core_file_path_helper::FilePathError; +use sd_core_heavy_lifting::{indexer, JobSystemError}; use sd_prisma::prisma::location; use sd_utils::{db::MissingFieldError, error::FileIOError}; @@ -103,6 +104,10 @@ pub enum LocationManagerError { #[error("missing-field")] MissingField(#[from] MissingFieldError), + #[error(transparent)] + Indexer(#[from] indexer::Error), + #[error(transparent)] + JobSystem(#[from] JobSystemError), #[error(transparent)] FileIO(#[from] FileIOError), } diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index 357541bdb058..370ea8f287d8 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -1113,8 +1113,22 @@ pub(super) async fn recalculate_directories_size( path.display(), location_path.display(), ); - reverse_update_directories_sizes(path, location_id, location_path, library) - .await?; + let mut non_critical_errors = vec![]; + reverse_update_directories_sizes( + path, + location_id, + location_path, + &library.db, + &library.sync, + &mut non_critical_errors, + ) + .await?; + + error!( + "Reverse calculating directory sizes finished with {} non-critical errors: {non_critical_errors:#?}", + non_critical_errors.len() + ); + should_invalidate = true; } else { should_update_location_size = true; diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index be05ec7b6987..4154dd477e92 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -1,13 +1,21 @@ use crate::{ + context::NodeContext, invalidate_query, library::Library, - old_job::{JobBuilder, JobError, JobManagerError}, + // old_job::{JobBuilder, JobError, JobManagerError}, Node, }; use sd_core_file_path_helper::{ filter_existing_file_path_params, IsolatedFilePathData, IsolatedFilePathDataParts, }; +use sd_core_heavy_lifting::{ + file_identifier::{self, FileIdentifier}, + indexer::{self, job::Indexer}, + job_system::report::ReportInputMetadata, + media_processor::{self, job::MediaProcessor}, + JobEnqueuer, JobId, JobSystemError, +}; use sd_core_prisma_helpers::location_with_indexer_rules; use sd_prisma::{ @@ -32,14 +40,13 @@ use futures::future::TryFutureExt; use normpath::PathExt; use prisma_client_rust::{operator::and, or, QueryError}; use serde::{Deserialize, Serialize}; -use serde_json::json; use specta::Type; use tokio::{fs, io, time::Instant}; use tracing::{debug, error, info, warn}; use uuid::Uuid; mod error; -pub mod indexer; +// pub mod indexer; mod manager; pub mod metadata; pub mod non_indexed; @@ -462,70 +469,79 @@ pub async fn scan_location( library: &Arc, location: location_with_indexer_rules::Data, location_scan_state: ScanState, -) -> Result<(), JobManagerError> { +) -> Result, JobSystemError> { // TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup. if location.instance_id != Some(library.config().await.instance_id) { - return Ok(()); + return Ok(None); } + let location_id = location.id; + let ctx = NodeContext { + node: Arc::clone(node), + library: Arc::clone(library), + }; + let location_base_data = location::Data::from(&location); debug!("Scanning location with state: {location_scan_state:?}"); - match location_scan_state { + let job_id = match location_scan_state { ScanState::Pending | ScanState::Completed => { - JobBuilder::new(OldIndexerJobInit { - location, - sub_path: None, - }) - .with_action("scan_location") - .with_metadata(json!({"location": location_base_data.clone()})) - .build() - .queue_next(OldFileIdentifierJobInit { - location: location_base_data.clone(), - sub_path: None, - }) - .queue_next(OldMediaProcessorJobInit { - location: location_base_data, - sub_path: None, - regenerate_thumbnails: false, - regenerate_labels: false, - }) - .spawn(node, library) - .await + node.job_system + .dispatch( + JobEnqueuer::new( + Indexer::new(location, None).map_err(sd_core_heavy_lifting::Error::from)?, + ) + .with_action("scan_location") + .with_metadata(ReportInputMetadata::Location(location_base_data.clone())) + .enqueue_next( + FileIdentifier::new(location_base_data.clone(), None) + .map_err(sd_core_heavy_lifting::Error::from)?, + ) + .enqueue_next( + MediaProcessor::new(location_base_data, None, false) + .map_err(sd_core_heavy_lifting::Error::from)?, + ), + location_id, + ctx.clone(), + ) + .await? } ScanState::Indexed => { - JobBuilder::new(OldFileIdentifierJobInit { - location: location_base_data.clone(), - sub_path: None, - }) - .with_action("scan_location_already_indexed") - .with_metadata(json!({"location": location_base_data.clone()})) - .build() - .queue_next(OldMediaProcessorJobInit { - location: location_base_data, - sub_path: None, - regenerate_thumbnails: false, - regenerate_labels: false, - }) - .spawn(node, library) - .await + node.job_system + .dispatch( + JobEnqueuer::new( + FileIdentifier::new(location_base_data.clone(), None) + .map_err(sd_core_heavy_lifting::Error::from)?, + ) + .with_action("scan_location_already_indexed") + .with_metadata(ReportInputMetadata::Location(location_base_data.clone())) + .enqueue_next( + MediaProcessor::new(location_base_data, None, false) + .map_err(sd_core_heavy_lifting::Error::from)?, + ), + location_id, + ctx.clone(), + ) + .await? } ScanState::FilesIdentified => { - JobBuilder::new(OldMediaProcessorJobInit { - location: location_base_data.clone(), - sub_path: None, - regenerate_thumbnails: false, - regenerate_labels: false, - }) - .with_action("scan_location_files_already_identified") - .with_metadata(json!({"location": location_base_data})) - .build() - .spawn(node, library) - .await + node.job_system + .dispatch( + JobEnqueuer::new( + MediaProcessor::new(location_base_data.clone(), None, false) + .map_err(sd_core_heavy_lifting::Error::from)?, + ) + .with_action("scan_location_files_already_identified") + .with_metadata(ReportInputMetadata::Location(location_base_data)), + location_id, + ctx.clone(), + ) + .await? } - } - .map_err(Into::into) + }; + + Ok(Some(job_id)) } pub async fn scan_location_sub_path( @@ -533,39 +549,44 @@ pub async fn scan_location_sub_path( library: &Arc, location: location_with_indexer_rules::Data, sub_path: impl AsRef, -) -> Result<(), JobManagerError> { +) -> Result, JobSystemError> { let sub_path = sub_path.as_ref().to_path_buf(); // TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup. if location.instance_id != Some(library.config().await.instance_id) { - return Ok(()); + return Ok(None); } + let location_id = location.id; + let ctx = NodeContext { + node: Arc::clone(node), + library: Arc::clone(library), + }; + let location_base_data = location::Data::from(&location); - JobBuilder::new(OldIndexerJobInit { - location, - sub_path: Some(sub_path.clone()), - }) - .with_action("scan_location_sub_path") - .with_metadata(json!({ - "location": location_base_data.clone(), - "sub_path": sub_path.clone(), - })) - .build() - .queue_next(OldFileIdentifierJobInit { - location: location_base_data.clone(), - sub_path: Some(sub_path.clone()), - }) - .queue_next(OldMediaProcessorJobInit { - location: location_base_data, - sub_path: Some(sub_path), - regenerate_thumbnails: false, - regenerate_labels: false, - }) - .spawn(node, library) - .await - .map_err(Into::into) + node.job_system + .dispatch( + JobEnqueuer::new( + Indexer::new(location, Some(sub_path.clone())) + .map_err(sd_core_heavy_lifting::Error::from)?, + ) + .with_action("scan_location") + .with_metadata(ReportInputMetadata::Location(location_base_data.clone())) + .with_metadata(ReportInputMetadata::SubPath(sub_path.clone())) + .enqueue_next( + FileIdentifier::new(location_base_data.clone(), Some(sub_path.clone())) + .map_err(sd_core_heavy_lifting::Error::from)?, + ) + .enqueue_next( + MediaProcessor::new(location_base_data, Some(sub_path), false) + .map_err(sd_core_heavy_lifting::Error::from)?, + ), + location_id, + ctx.clone(), + ) + .await + .map(Some) } pub async fn light_scan_location( @@ -573,7 +594,7 @@ pub async fn light_scan_location( library: Arc, location: location_with_indexer_rules::Data, sub_path: impl AsRef, -) -> Result<(), JobError> { +) -> Result<(), sd_core_heavy_lifting::Error> { let sub_path = sub_path.as_ref().to_path_buf(); // TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup. @@ -583,17 +604,34 @@ pub async fn light_scan_location( let location_base_data = location::Data::from(&location); - indexer::old_shallow(&location, &sub_path, &node, &library).await?; - old_file_identifier::old_shallow(&location_base_data, &sub_path, &library).await?; - old_media_processor::old_shallow( - &location_base_data, - &sub_path, - &library, - #[cfg(feature = "ai")] - false, - &node, - ) - .await?; + // indexer::old_shallow(&location, &sub_path, &node, &library).await?; + // old_file_identifier::old_shallow(&location_base_data, &sub_path, &library).await?; + // old_media_processor::old_shallow( + // &location_base_data, + // &sub_path, + // &library, + // #[cfg(feature = "ai")] + // false, + // &node, + // ) + // .await?; + + let dispatcher = node.task_system.get_dispatcher(); + let ctx = NodeContext { node, library }; + + for e in indexer::shallow(location, &sub_path, &dispatcher, &ctx).await? { + error!("Shallow indexer errors: {e:#?}"); + } + + for e in + file_identifier::shallow(location_base_data.clone(), &sub_path, &dispatcher, &ctx).await? + { + error!("Shallow file identifier errors: {e:#?}"); + } + + for e in media_processor::shallow(location_base_data, &sub_path, &dispatcher, &ctx).await? { + error!("Shallow media processor errors: {e:#?}"); + } Ok(()) } diff --git a/core/src/object/media/mod.rs b/core/src/object/media/mod.rs index b62317d694ab..36ade439aa36 100644 --- a/core/src/object/media/mod.rs +++ b/core/src/object/media/mod.rs @@ -19,11 +19,11 @@ // pub mod old_media_processor; // pub mod old_thumbnail; -use std::path::PathBuf; +use crate::{library::LibraryId, Node}; -use sd_core_heavy_lifting::media_processor::{ThumbKey, ThumbnailKind}; +use sd_core_heavy_lifting::media_processor::ThumbnailKind; -use crate::{library::LibraryId, Node}; +use std::path::PathBuf; /// This does not check if a thumbnail exists, it just returns the path that it would exist at pub fn get_indexed_thumbnail_path(node: &Node, cas_id: &str, library_id: LibraryId) -> PathBuf { diff --git a/core/src/old_job/error.rs b/core/src/old_job/error.rs index 2bc4ad87a5c8..e6fcaaf26bda 100644 --- a/core/src/old_job/error.rs +++ b/core/src/old_job/error.rs @@ -1,5 +1,5 @@ use crate::{ - location::{indexer::IndexerError, LocationError}, + location::{/*indexer::IndexerError,*/ LocationError}, object::{ fs::error::FileSystemJobsError, /*media::old_media_processor::MediaProcessorError,*/ /*old_file_identifier::FileIdentifierJobError,*/ validation::ValidatorError, @@ -56,8 +56,8 @@ pub enum JobError { Critical(&'static str), // Specific job errors - #[error(transparent)] - Indexer(#[from] IndexerError), + // #[error(transparent)] + // Indexer(#[from] IndexerError), // #[error(transparent)] // MediaProcessor(#[from] MediaProcessorError), // #[error(transparent)] diff --git a/core/src/util/debug_initializer.rs b/core/src/util/debug_initializer.rs index 8d796cc3d9ce..0427049f1705 100644 --- a/core/src/util/debug_initializer.rs +++ b/core/src/util/debug_initializer.rs @@ -11,6 +11,7 @@ use crate::{ Node, }; +use sd_core_heavy_lifting::JobSystemError; use sd_prisma::prisma::location; use sd_utils::error::FileIOError; @@ -75,6 +76,8 @@ pub enum InitConfigError { #[error("failed to get current directory from environment: {0}")] CurrentDir(io::Error), + #[error(transparent)] + JobSystem(#[from] JobSystemError), #[error(transparent)] FileIO(#[from] FileIOError), } diff --git a/crates/task-system/src/system.rs b/crates/task-system/src/system.rs index 41a2c802d2bd..7df19aeca5ba 100644 --- a/crates/task-system/src/system.rs +++ b/crates/task-system/src/system.rs @@ -39,12 +39,16 @@ pub struct System { impl System { /// Created a new task system with a number of workers equal to the available parallelism in the user's machine. pub fn new() -> Self { - let workers_count = std::thread::available_parallelism().map_or_else( - |e| { - error!("Failed to get available parallelism in the job system: {e:#?}"); - 1 - }, - NonZeroUsize::get, + // TODO: Using only the half of available cores, make this configurable on runtime in the future + let workers_count = usize::max( + std::thread::available_parallelism().map_or_else( + |e| { + error!("Failed to get available parallelism in the job system: {e:#?}"); + 1 + }, + NonZeroUsize::get, + ) / 2, + 1, ); let (msgs_tx, msgs_rx) = chan::bounded(8); @@ -91,7 +95,7 @@ impl System { } }); - trace!("Task system online!"); + info!("Task system online with {workers_count} workers!"); Self { workers: Arc::clone(&workers), diff --git a/crates/task-system/src/task.rs b/crates/task-system/src/task.rs index 7804a01ca2db..486818c7dedd 100644 --- a/crates/task-system/src/task.rs +++ b/crates/task-system/src/task.rs @@ -14,7 +14,7 @@ use async_channel as chan; use async_trait::async_trait; use chan::{Recv, RecvError}; use downcast_rs::{impl_downcast, Downcast}; -use tokio::{runtime::Handle, sync::oneshot}; +use tokio::{spawn, sync::oneshot}; use tracing::{trace, warn}; use uuid::Uuid; @@ -508,20 +508,40 @@ impl TaskHandle { } /// A helper struct when you just want to cancel a task if its `TaskHandle` gets dropped. -pub struct CancelTaskOnDrop(pub TaskHandle); +pub struct CancelTaskOnDrop(Option>); + +impl CancelTaskOnDrop { + /// Create a new `CancelTaskOnDrop` object with the given `TaskHandle`. + #[must_use] + pub const fn new(handle: TaskHandle) -> Self { + Self(Some(handle)) + } +} impl Future for CancelTaskOnDrop { type Output = Result, SystemError>; fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { - Pin::new(&mut self.0).poll(cx) + if let Some(handle) = self.0.as_mut() { + match Pin::new(handle).poll(cx) { + Poll::Ready(res) => { + self.0 = None; + Poll::Ready(res) + } + Poll::Pending => Poll::Pending, + } + } else { + Poll::Ready(Ok(TaskStatus::Canceled)) + } } } impl Drop for CancelTaskOnDrop { fn drop(&mut self) { // FIXME: We should use async drop when it becomes stable - Handle::current().block_on(self.0.cancel()); + if let Some(handle) = self.0.take() { + spawn(async move { handle.cancel().await }); + } } } diff --git a/interface/app/$libraryId/Explorer/FilePath/Thumb.tsx b/interface/app/$libraryId/Explorer/FilePath/Thumb.tsx index f533fe7ea6e9..afd71e3476f6 100644 --- a/interface/app/$libraryId/Explorer/FilePath/Thumb.tsx +++ b/interface/app/$libraryId/Explorer/FilePath/Thumb.tsx @@ -69,7 +69,7 @@ export const FileThumb = forwardRef((props, ref) = if ( loadState.thumbnail !== 'error' && itemData.hasLocalThumbnail && - itemData.thumbnailKey.length > 0 + itemData.thumbnailKey ) return { variant: 'thumbnail' }; @@ -87,7 +87,7 @@ export const FileThumb = forwardRef((props, ref) = break; case 'thumbnail': - if (itemData.thumbnailKey.length > 0) + if (itemData.thumbnailKey) return platform.getThumbnailUrlByThumbKey(itemData.thumbnailKey); break; diff --git a/interface/app/$libraryId/Explorer/store.ts b/interface/app/$libraryId/Explorer/store.ts index 810dc9f345a3..5ba8bfb96374 100644 --- a/interface/app/$libraryId/Explorer/store.ts +++ b/interface/app/$libraryId/Explorer/store.ts @@ -1,4 +1,5 @@ import { + ThumbKey, resetStore, type DoubleClickAction, type ExplorerItem, @@ -113,14 +114,14 @@ const state = { quickRescanLastRun: Date.now() - 200 }; -export function flattenThumbnailKey(thumbKey: string[]) { - return thumbKey.join('/'); +export function flattenThumbnailKey(thumbKey: ThumbKey) { + return `${thumbKey.base_directory_str}/${thumbKey.shard_hex}/${thumbKey.cas_id}`; } export const explorerStore = proxy({ ...state, reset: (_state?: typeof state) => resetStore(explorerStore, _state || state), - addNewThumbnail: (thumbKey: string[]) => { + addNewThumbnail: (thumbKey: ThumbKey) => { explorerStore.newThumbnails.add(flattenThumbnailKey(thumbKey)); }, resetCache: () => { diff --git a/interface/app/$libraryId/settings/client/general.tsx b/interface/app/$libraryId/settings/client/general.tsx index da16765d97bd..90d04cd4fbd7 100644 --- a/interface/app/$libraryId/settings/client/general.tsx +++ b/interface/app/$libraryId/settings/client/general.tsx @@ -68,9 +68,9 @@ export const Component = () => { p2p_ipv6_enabled: node.data?.p2p.ipv6 || true, p2p_discovery: node.data?.p2p.discovery || 'Everyone', p2p_remote_access: node.data?.p2p.remote_access || false, - image_labeler_version: node.data?.image_labeler_version ?? undefined, - background_processing_percentage: - node.data?.preferences.thumbnailer.background_processing_percentage || 50 + image_labeler_version: node.data?.image_labeler_version ?? undefined + // background_processing_percentage: + // node.data?.preferences.thumbnailer?.background_processing_percentage || 50 } }); const p2p_port = form.watch('p2p_port'); @@ -232,10 +232,7 @@ export const Component = () => { platform.constructRemoteRspcPath( params.node, - `thumbnail/${thumbKey.map((i) => encodeURIComponent(i)).join('/')}.webp` + `thumbnail/${encodeURIComponent( + thumbKey.base_directory_str + )}/${encodeURIComponent(thumbKey.shard_hex)}/${encodeURIComponent( + thumbKey.cas_id + )}.webp` ), getFileUrl: (libraryId, locationLocalId, filePathId) => platform.constructRemoteRspcPath( diff --git a/interface/util/Platform.tsx b/interface/util/Platform.tsx index dc73def1c364..e9f1b0a8e740 100644 --- a/interface/util/Platform.tsx +++ b/interface/util/Platform.tsx @@ -1,5 +1,5 @@ import { createContext, useContext, type PropsWithChildren } from 'react'; -import { auth } from '@sd/client'; +import { ThumbKey, auth } from '@sd/client'; export type OperatingSystem = 'browser' | 'linux' | 'macOS' | 'windows' | 'unknown'; @@ -17,7 +17,7 @@ export type OpenWithApplication = { url: string; name: string }; // This could be Tauri or web. export type Platform = { platform: 'web' | 'tauri'; // This represents the specific platform implementation - getThumbnailUrlByThumbKey: (thumbKey: string[]) => string; + getThumbnailUrlByThumbKey: (thumbKey: ThumbKey) => string; getFileUrl: (libraryId: string, locationLocalId: number, filePathId: number) => string; getFileUrlByPath: (path: string) => string; getRemoteRspcEndpoint: (remote_identity: string) => { diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index 5c9c249a7200..f1364efbc712 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -2,148 +2,147 @@ // This file was generated by [rspc](https://github.com/oscartbeaumont/rspc). Do not edit this file manually. export type Procedures = { - queries: - { key: "auth.me", input: never, result: { id: string; email: string } } | - { key: "backups.getAll", input: never, result: GetAll } | - { key: "buildInfo", input: never, result: BuildInfo } | - { key: "cloud.getApiOrigin", input: never, result: string } | - { key: "cloud.library.get", input: LibraryArgs, result: CloudLibrary | null } | - { key: "cloud.library.list", input: never, result: CloudLibrary[] } | - { key: "cloud.locations.list", input: never, result: CloudLocation[] } | - { key: "ephemeralFiles.getMediaData", input: string, result: MediaData | null } | - { key: "files.get", input: LibraryArgs, result: ObjectWithFilePaths2 | null } | - { key: "files.getConvertibleImageExtensions", input: never, result: string[] } | - { key: "files.getMediaData", input: LibraryArgs, result: MediaData } | - { key: "files.getPath", input: LibraryArgs, result: string | null } | - { key: "invalidation.test-invalidate", input: never, result: number } | - { key: "jobs.isActive", input: LibraryArgs, result: boolean } | - { key: "jobs.reports", input: LibraryArgs, result: JobGroup[] } | - { key: "labels.count", input: LibraryArgs, result: number } | - { key: "labels.get", input: LibraryArgs, result: Label | null } | - { key: "labels.getForObject", input: LibraryArgs, result: Label[] } | - { key: "labels.getWithObjects", input: LibraryArgs, result: { [key in number]: { date_created: string; object: { id: number } }[] } } | - { key: "labels.list", input: LibraryArgs, result: Label[] } | - { key: "labels.listWithThumbnails", input: LibraryArgs, result: ExplorerItem[] } | - { key: "library.kindStatistics", input: LibraryArgs, result: KindStatistics } | - { key: "library.list", input: never, result: LibraryConfigWrapped[] } | - { key: "library.statistics", input: LibraryArgs, result: StatisticsResponse } | - { key: "locations.get", input: LibraryArgs, result: Location | null } | - { key: "locations.getWithRules", input: LibraryArgs, result: LocationWithIndexerRule | null } | - { key: "locations.indexer_rules.get", input: LibraryArgs, result: IndexerRule } | - { key: "locations.indexer_rules.list", input: LibraryArgs, result: IndexerRule[] } | - { key: "locations.indexer_rules.listForLocation", input: LibraryArgs, result: IndexerRule[] } | - { key: "locations.list", input: LibraryArgs, result: Location[] } | - { key: "locations.systemLocations", input: never, result: SystemLocations } | - { key: "models.image_detection.list", input: never, result: string[] } | - { key: "nodeState", input: never, result: NodeState } | - { key: "nodes.listLocations", input: LibraryArgs, result: ExplorerItem[] } | - { key: "notifications.dismiss", input: NotificationId, result: null } | - { key: "notifications.dismissAll", input: never, result: null } | - { key: "notifications.get", input: never, result: Notification[] } | - { key: "p2p.listeners", input: never, result: Listeners } | - { key: "p2p.state", input: never, result: JsonValue } | - { key: "preferences.get", input: LibraryArgs, result: LibraryPreferences } | - { key: "search.objects", input: LibraryArgs, result: SearchData } | - { key: "search.objectsCount", input: LibraryArgs<{ filters?: SearchFilterArgs[] }>, result: number } | - { key: "search.paths", input: LibraryArgs, result: SearchData } | - { key: "search.pathsCount", input: LibraryArgs<{ filters?: SearchFilterArgs[] }>, result: number } | - { key: "search.saved.get", input: LibraryArgs, result: SavedSearch | null } | - { key: "search.saved.list", input: LibraryArgs, result: SavedSearch[] } | - { key: "sync.enabled", input: LibraryArgs, result: boolean } | - { key: "sync.messages", input: LibraryArgs, result: CRDTOperation[] } | - { key: "tags.get", input: LibraryArgs, result: Tag | null } | - { key: "tags.getForObject", input: LibraryArgs, result: Tag[] } | - { key: "tags.getWithObjects", input: LibraryArgs, result: { [key in number]: ({ object: { id: number }; date_created: string | null })[] } } | - { key: "tags.list", input: LibraryArgs, result: Tag[] } | + queries: + { key: "auth.me", input: never, result: { id: string; email: string } } | + { key: "backups.getAll", input: never, result: GetAll } | + { key: "buildInfo", input: never, result: BuildInfo } | + { key: "cloud.getApiOrigin", input: never, result: string } | + { key: "cloud.library.get", input: LibraryArgs, result: CloudLibrary | null } | + { key: "cloud.library.list", input: never, result: CloudLibrary[] } | + { key: "cloud.locations.list", input: never, result: CloudLocation[] } | + { key: "ephemeralFiles.getMediaData", input: string, result: MediaData | null } | + { key: "files.get", input: LibraryArgs, result: ObjectWithFilePaths2 | null } | + { key: "files.getConvertibleImageExtensions", input: never, result: string[] } | + { key: "files.getMediaData", input: LibraryArgs, result: MediaData } | + { key: "files.getPath", input: LibraryArgs, result: string | null } | + { key: "invalidation.test-invalidate", input: never, result: number } | + { key: "jobs.isActive", input: LibraryArgs, result: boolean } | + { key: "jobs.reports", input: LibraryArgs, result: JobGroup[] } | + { key: "labels.count", input: LibraryArgs, result: number } | + { key: "labels.get", input: LibraryArgs, result: Label | null } | + { key: "labels.getForObject", input: LibraryArgs, result: Label[] } | + { key: "labels.getWithObjects", input: LibraryArgs, result: { [key in number]: { date_created: string; object: { id: number } }[] } } | + { key: "labels.list", input: LibraryArgs, result: Label[] } | + { key: "labels.listWithThumbnails", input: LibraryArgs, result: ExplorerItem[] } | + { key: "library.kindStatistics", input: LibraryArgs, result: KindStatistics } | + { key: "library.list", input: never, result: LibraryConfigWrapped[] } | + { key: "library.statistics", input: LibraryArgs, result: StatisticsResponse } | + { key: "locations.get", input: LibraryArgs, result: Location | null } | + { key: "locations.getWithRules", input: LibraryArgs, result: LocationWithIndexerRule | null } | + { key: "locations.indexer_rules.get", input: LibraryArgs, result: IndexerRule } | + { key: "locations.indexer_rules.list", input: LibraryArgs, result: IndexerRule[] } | + { key: "locations.indexer_rules.listForLocation", input: LibraryArgs, result: IndexerRule[] } | + { key: "locations.list", input: LibraryArgs, result: Location[] } | + { key: "locations.systemLocations", input: never, result: SystemLocations } | + { key: "models.image_detection.list", input: never, result: string[] } | + { key: "nodeState", input: never, result: NodeState } | + { key: "nodes.listLocations", input: LibraryArgs, result: ExplorerItem[] } | + { key: "notifications.dismiss", input: NotificationId, result: null } | + { key: "notifications.dismissAll", input: never, result: null } | + { key: "notifications.get", input: never, result: Notification[] } | + { key: "p2p.listeners", input: never, result: Listeners } | + { key: "p2p.state", input: never, result: JsonValue } | + { key: "preferences.get", input: LibraryArgs, result: LibraryPreferences } | + { key: "search.objects", input: LibraryArgs, result: SearchData } | + { key: "search.objectsCount", input: LibraryArgs<{ filters?: SearchFilterArgs[] }>, result: number } | + { key: "search.paths", input: LibraryArgs, result: SearchData } | + { key: "search.pathsCount", input: LibraryArgs<{ filters?: SearchFilterArgs[] }>, result: number } | + { key: "search.saved.get", input: LibraryArgs, result: SavedSearch | null } | + { key: "search.saved.list", input: LibraryArgs, result: SavedSearch[] } | + { key: "sync.enabled", input: LibraryArgs, result: boolean } | + { key: "sync.messages", input: LibraryArgs, result: CRDTOperation[] } | + { key: "tags.get", input: LibraryArgs, result: Tag | null } | + { key: "tags.getForObject", input: LibraryArgs, result: Tag[] } | + { key: "tags.getWithObjects", input: LibraryArgs, result: { [key in number]: ({ object: { id: number }; date_created: string | null })[] } } | + { key: "tags.list", input: LibraryArgs, result: Tag[] } | { key: "volumes.list", input: never, result: Volume[] }, - mutations: - { key: "api.sendFeedback", input: Feedback, result: null } | - { key: "auth.logout", input: never, result: null } | - { key: "backups.backup", input: LibraryArgs, result: string } | - { key: "backups.delete", input: string, result: null } | - { key: "backups.restore", input: string, result: null } | - { key: "cloud.library.create", input: LibraryArgs, result: null } | - { key: "cloud.library.join", input: string, result: LibraryConfigWrapped } | - { key: "cloud.library.sync", input: LibraryArgs, result: null } | - { key: "cloud.locations.create", input: string, result: CloudLocation } | - { key: "cloud.locations.remove", input: string, result: CloudLocation } | - { key: "cloud.locations.testing", input: TestingParams, result: null } | - { key: "cloud.setApiOrigin", input: string, result: null } | - { key: "ephemeralFiles.copyFiles", input: LibraryArgs, result: null } | - { key: "ephemeralFiles.createFile", input: LibraryArgs, result: string } | - { key: "ephemeralFiles.createFolder", input: LibraryArgs, result: string } | - { key: "ephemeralFiles.cutFiles", input: LibraryArgs, result: null } | - { key: "ephemeralFiles.deleteFiles", input: LibraryArgs, result: null } | - { key: "ephemeralFiles.moveToTrash", input: LibraryArgs, result: null } | - { key: "ephemeralFiles.renameFile", input: LibraryArgs, result: null } | - { key: "files.convertImage", input: LibraryArgs, result: null } | - { key: "files.copyFiles", input: LibraryArgs, result: null } | - { key: "files.createFile", input: LibraryArgs, result: string } | - { key: "files.createFolder", input: LibraryArgs, result: string } | - { key: "files.cutFiles", input: LibraryArgs, result: null } | - { key: "files.deleteFiles", input: LibraryArgs, result: null } | - { key: "files.eraseFiles", input: LibraryArgs, result: null } | - { key: "files.moveToTrash", input: LibraryArgs, result: null } | - { key: "files.removeAccessTime", input: LibraryArgs, result: null } | - { key: "files.renameFile", input: LibraryArgs, result: null } | - { key: "files.setFavorite", input: LibraryArgs, result: null } | - { key: "files.setNote", input: LibraryArgs, result: null } | - { key: "files.updateAccessTime", input: LibraryArgs, result: null } | - { key: "invalidation.test-invalidate-mutation", input: LibraryArgs, result: null } | - { key: "jobs.cancel", input: LibraryArgs, result: null } | - { key: "jobs.clear", input: LibraryArgs, result: null } | - { key: "jobs.clearAll", input: LibraryArgs, result: null } | - { key: "jobs.generateLabelsForLocation", input: LibraryArgs, result: null } | - { key: "jobs.generateThumbsForLocation", input: LibraryArgs, result: null } | - { key: "jobs.identifyUniqueFiles", input: LibraryArgs, result: null } | - { key: "jobs.objectValidator", input: LibraryArgs, result: null } | - { key: "jobs.pause", input: LibraryArgs, result: null } | - { key: "jobs.resume", input: LibraryArgs, result: null } | - { key: "labels.delete", input: LibraryArgs, result: null } | - { key: "library.create", input: CreateLibraryArgs, result: LibraryConfigWrapped } | - { key: "library.delete", input: string, result: null } | - { key: "library.edit", input: EditLibraryArgs, result: null } | - { key: "library.startActor", input: LibraryArgs, result: null } | - { key: "library.stopActor", input: LibraryArgs, result: null } | - { key: "library.vaccumDb", input: LibraryArgs, result: null } | - { key: "locations.addLibrary", input: LibraryArgs, result: number | null } | - { key: "locations.create", input: LibraryArgs, result: number | null } | - { key: "locations.delete", input: LibraryArgs, result: null } | - { key: "locations.fullRescan", input: LibraryArgs, result: null } | - { key: "locations.indexer_rules.create", input: LibraryArgs, result: null } | - { key: "locations.indexer_rules.delete", input: LibraryArgs, result: null } | - { key: "locations.relink", input: LibraryArgs, result: number } | - { key: "locations.subPathRescan", input: LibraryArgs, result: null } | - { key: "locations.update", input: LibraryArgs, result: null } | - { key: "nodes.edit", input: ChangeNodeNameArgs, result: null } | - { key: "nodes.updateThumbnailerPreferences", input: UpdateThumbnailerPreferences, result: null } | - { key: "p2p.acceptSpacedrop", input: [string, string | null], result: null } | - { key: "p2p.cancelSpacedrop", input: string, result: null } | - { key: "p2p.debugConnect", input: RemoteIdentity, result: string } | - { key: "p2p.spacedrop", input: SpacedropArgs, result: string } | - { key: "preferences.update", input: LibraryArgs, result: null } | - { key: "search.saved.create", input: LibraryArgs<{ name: string; target?: SearchTarget; search?: string | null; filters?: string | null; description?: string | null; icon?: string | null }>, result: null } | - { key: "search.saved.delete", input: LibraryArgs, result: null } | - { key: "search.saved.update", input: LibraryArgs<[number, Args]>, result: null } | - { key: "sync.backfill", input: LibraryArgs, result: null } | - { key: "tags.assign", input: LibraryArgs<{ targets: Target[]; tag_id: number; unassign: boolean }>, result: null } | - { key: "tags.create", input: LibraryArgs, result: Tag } | - { key: "tags.delete", input: LibraryArgs, result: null } | - { key: "tags.update", input: LibraryArgs, result: null } | + mutations: + { key: "api.sendFeedback", input: Feedback, result: null } | + { key: "auth.logout", input: never, result: null } | + { key: "backups.backup", input: LibraryArgs, result: string } | + { key: "backups.delete", input: string, result: null } | + { key: "backups.restore", input: string, result: null } | + { key: "cloud.library.create", input: LibraryArgs, result: null } | + { key: "cloud.library.join", input: string, result: LibraryConfigWrapped } | + { key: "cloud.library.sync", input: LibraryArgs, result: null } | + { key: "cloud.locations.create", input: string, result: CloudLocation } | + { key: "cloud.locations.remove", input: string, result: CloudLocation } | + { key: "cloud.locations.testing", input: TestingParams, result: null } | + { key: "cloud.setApiOrigin", input: string, result: null } | + { key: "ephemeralFiles.copyFiles", input: LibraryArgs, result: null } | + { key: "ephemeralFiles.createFile", input: LibraryArgs, result: string } | + { key: "ephemeralFiles.createFolder", input: LibraryArgs, result: string } | + { key: "ephemeralFiles.cutFiles", input: LibraryArgs, result: null } | + { key: "ephemeralFiles.deleteFiles", input: LibraryArgs, result: null } | + { key: "ephemeralFiles.moveToTrash", input: LibraryArgs, result: null } | + { key: "ephemeralFiles.renameFile", input: LibraryArgs, result: null } | + { key: "files.convertImage", input: LibraryArgs, result: null } | + { key: "files.copyFiles", input: LibraryArgs, result: null } | + { key: "files.createFile", input: LibraryArgs, result: string } | + { key: "files.createFolder", input: LibraryArgs, result: string } | + { key: "files.cutFiles", input: LibraryArgs, result: null } | + { key: "files.deleteFiles", input: LibraryArgs, result: null } | + { key: "files.eraseFiles", input: LibraryArgs, result: null } | + { key: "files.moveToTrash", input: LibraryArgs, result: null } | + { key: "files.removeAccessTime", input: LibraryArgs, result: null } | + { key: "files.renameFile", input: LibraryArgs, result: null } | + { key: "files.setFavorite", input: LibraryArgs, result: null } | + { key: "files.setNote", input: LibraryArgs, result: null } | + { key: "files.updateAccessTime", input: LibraryArgs, result: null } | + { key: "invalidation.test-invalidate-mutation", input: LibraryArgs, result: null } | + { key: "jobs.cancel", input: LibraryArgs, result: null } | + { key: "jobs.clear", input: LibraryArgs, result: null } | + { key: "jobs.clearAll", input: LibraryArgs, result: null } | + { key: "jobs.generateThumbsForLocation", input: LibraryArgs, result: string } | + { key: "jobs.identifyUniqueFiles", input: LibraryArgs, result: string } | + { key: "jobs.objectValidator", input: LibraryArgs, result: null } | + { key: "jobs.pause", input: LibraryArgs, result: null } | + { key: "jobs.resume", input: LibraryArgs, result: null } | + { key: "labels.delete", input: LibraryArgs, result: null } | + { key: "library.create", input: CreateLibraryArgs, result: LibraryConfigWrapped } | + { key: "library.delete", input: string, result: null } | + { key: "library.edit", input: EditLibraryArgs, result: null } | + { key: "library.startActor", input: LibraryArgs, result: null } | + { key: "library.stopActor", input: LibraryArgs, result: null } | + { key: "library.vaccumDb", input: LibraryArgs, result: null } | + { key: "locations.addLibrary", input: LibraryArgs, result: number | null } | + { key: "locations.create", input: LibraryArgs, result: number | null } | + { key: "locations.delete", input: LibraryArgs, result: null } | + { key: "locations.fullRescan", input: LibraryArgs, result: string | null } | + { key: "locations.indexer_rules.create", input: LibraryArgs, result: null } | + { key: "locations.indexer_rules.delete", input: LibraryArgs, result: null } | + { key: "locations.relink", input: LibraryArgs, result: number } | + { key: "locations.subPathRescan", input: LibraryArgs, result: string | null } | + { key: "locations.update", input: LibraryArgs, result: null } | + { key: "nodes.edit", input: ChangeNodeNameArgs, result: null } | + { key: "nodes.updateThumbnailerPreferences", input: UpdateThumbnailerPreferences, result: null } | + { key: "p2p.acceptSpacedrop", input: [string, string | null], result: null } | + { key: "p2p.cancelSpacedrop", input: string, result: null } | + { key: "p2p.debugConnect", input: RemoteIdentity, result: string } | + { key: "p2p.spacedrop", input: SpacedropArgs, result: string } | + { key: "preferences.update", input: LibraryArgs, result: null } | + { key: "search.saved.create", input: LibraryArgs<{ name: string; target?: SearchTarget; search?: string | null; filters?: string | null; description?: string | null; icon?: string | null }>, result: null } | + { key: "search.saved.delete", input: LibraryArgs, result: null } | + { key: "search.saved.update", input: LibraryArgs<[number, Args]>, result: null } | + { key: "sync.backfill", input: LibraryArgs, result: null } | + { key: "tags.assign", input: LibraryArgs<{ targets: Target[]; tag_id: number; unassign: boolean }>, result: null } | + { key: "tags.create", input: LibraryArgs, result: Tag } | + { key: "tags.delete", input: LibraryArgs, result: null } | + { key: "tags.update", input: LibraryArgs, result: null } | { key: "toggleFeatureFlag", input: BackendFeature, result: null }, - subscriptions: - { key: "auth.loginSession", input: never, result: Response } | - { key: "invalidation.listen", input: never, result: InvalidateOperationEvent[] } | - { key: "jobs.newFilePathIdentified", input: LibraryArgs, result: number[] } | - { key: "jobs.newThumbnail", input: LibraryArgs, result: string[] } | - { key: "jobs.progress", input: LibraryArgs, result: JobProgressEvent } | - { key: "library.actors", input: LibraryArgs, result: { [key in string]: boolean } } | - { key: "locations.online", input: never, result: number[][] } | - { key: "locations.quickRescan", input: LibraryArgs, result: null } | - { key: "notifications.listen", input: never, result: Notification } | - { key: "p2p.events", input: never, result: P2PEvent } | - { key: "search.ephemeralPaths", input: LibraryArgs, result: EphemeralPathsResultItem } | - { key: "sync.active", input: LibraryArgs, result: SyncStatus } | + subscriptions: + { key: "auth.loginSession", input: never, result: Response } | + { key: "invalidation.listen", input: never, result: InvalidateOperationEvent[] } | + { key: "jobs.newFilePathIdentified", input: LibraryArgs, result: number[] } | + { key: "jobs.newThumbnail", input: LibraryArgs, result: ThumbKey } | + { key: "jobs.progress", input: LibraryArgs, result: JobProgressEvent } | + { key: "library.actors", input: LibraryArgs, result: { [key in string]: boolean } } | + { key: "locations.online", input: never, result: number[][] } | + { key: "locations.quickRescan", input: LibraryArgs, result: null } | + { key: "notifications.listen", input: never, result: Notification } | + { key: "p2p.events", input: never, result: P2PEvent } | + { key: "search.ephemeralPaths", input: LibraryArgs, result: EphemeralPathsResultItem } | + { key: "sync.active", input: LibraryArgs, result: SyncStatus } | { key: "sync.newMessage", input: LibraryArgs, result: null } }; @@ -153,7 +152,7 @@ export type AudioProps = { delay: number; padding: number; sample_rate: number | /** * All of the feature flags provided by the core itself. The frontend has it's own set of feature flags! - * + * * If you want a variant of this to show up on the frontend it must be added to `backendFeatures` in `useFeatureFlag.tsx` */ export type BackendFeature = "cloudSync" @@ -182,19 +181,19 @@ export type Codec = { kind: string | null; sub_kind: string | null; tag: string export type ColorProfile = "Normal" | "Custom" | "HDRNoOriginal" | "HDRWithOriginal" | "OriginalForHDR" | "Panorama" | "PortraitHDR" | "Portrait" -export type Composite = +export type Composite = /** * The data is present, but we're unable to determine what they mean */ -"Unknown" | +"Unknown" | /** * Not a composite image */ -"False" | +"False" | /** * A general composite image */ -"General" | +"General" | /** * The composite image was captured while shooting */ @@ -265,7 +264,7 @@ export type ExifDataOrder = { field: "epochTime"; value: SortOrder } export type ExifMetadata = { resolution: Resolution; date_taken: MediaDate | null; location: MediaLocation | null; camera_data: CameraData; artist: string | null; description: string | null; copyright: string | null; exif_version: string | null } -export type ExplorerItem = { type: "Path"; thumbnail: string[] | null; has_created_thumbnail: boolean; item: FilePathForFrontend } | { type: "Object"; thumbnail: string[] | null; has_created_thumbnail: boolean; item: ObjectWithFilePaths } | { type: "NonIndexedPath"; thumbnail: string[] | null; has_created_thumbnail: boolean; item: NonIndexedPathItem } | { type: "Location"; item: Location } | { type: "SpacedropPeer"; item: PeerMetadata } | { type: "Label"; thumbnails: string[][]; item: LabelWithObjects } +export type ExplorerItem = { type: "Path"; thumbnail: ThumbKey | null; has_created_thumbnail: boolean; item: FilePathForFrontend } | { type: "Object"; thumbnail: ThumbKey | null; has_created_thumbnail: boolean; item: ObjectWithFilePaths } | { type: "NonIndexedPath"; thumbnail: ThumbKey | null; has_created_thumbnail: boolean; item: NonIndexedPathItem } | { type: "Location"; item: Location } | { type: "SpacedropPeer"; item: PeerMetadata } | { type: "Label"; thumbnails: ThumbKey[]; item: LabelWithObjects } export type ExplorerLayout = "grid" | "list" | "media" @@ -299,46 +298,46 @@ export type FilePathOrder = { field: "name"; value: SortOrder } | { field: "size export type FilePathSearchArgs = { take?: number | null; orderAndPagination?: OrderAndPagination | null; filters?: SearchFilterArgs[]; groupDirectories?: boolean } -export type Flash = { +export type Flash = { /** * Specifies how flash was used (on, auto, off, forced, onvalid) - * + * * [`FlashMode::Unknown`] isn't a valid EXIF state, but it's included as the default, * just in case we're unable to correctly match it to a known (valid) state. - * + * * This type should only ever be evaluated if flash EXIF data is present, so having this as a non-option shouldn't be an issue. */ -mode: FlashMode; +mode: FlashMode; /** * Did the flash actually fire? */ -fired: boolean | null; +fired: boolean | null; /** * Did flash return to the camera? (Unsure of the meaning) */ -returned: boolean | null; +returned: boolean | null; /** * Was red eye reduction used? */ red_eye_reduction: boolean | null } -export type FlashMode = +export type FlashMode = /** * The data is present, but we're unable to determine what they mean */ -"Unknown" | +"Unknown" | /** * `FLash` was on */ -"On" | +"On" | /** * Flash was off */ -"Off" | +"Off" | /** * Flash was set to automatically fire in certain conditions */ -"Auto" | +"Auto" | /** * Flash was forcefully fired */ @@ -348,8 +347,6 @@ export type FromPattern = { pattern: string; replace_all: boolean } export type FullRescanArgs = { location_id: number; reidentify_objects: boolean } -export type GenerateLabelsForLocationArgs = { id: number; path: string; regenerate?: boolean } - export type GenerateThumbsForLocationArgs = { id: number; path: string; regenerate?: boolean } export type GetAll = { backups: Backup[]; directory: string } @@ -365,10 +362,10 @@ export type IndexerRule = { id: number; pub_id: number[]; name: string | null; d /** * `IndexerRuleCreateArgs` is the argument received from the client using rspc to create a new indexer rule. * Note that `rules` field is a vector of tuples of `RuleKind` and `parameters`. - * + * * In case of `RuleKind::AcceptFilesByGlob` or `RuleKind::RejectFilesByGlob`, it will be a * vector of strings containing a glob patterns. - * + * * In case of `RuleKind::AcceptIfChildrenDirectoriesArePresent` or `RuleKind::RejectIfChildrenDirectoriesArePresent` the * `parameters` field must be a vector of strings containing the names of the directories. */ @@ -402,19 +399,19 @@ export type LibraryArgs = { library_id: string; arg: T } /** * LibraryConfig holds the configuration for a specific library. This is stored as a '{uuid}.sdlibrary' file. */ -export type LibraryConfig = { +export type LibraryConfig = { /** * name is the display name of the library. This is used in the UI and is set by the user. */ -name: LibraryName; +name: LibraryName; /** * description is a user set description of the library. This is used in the UI and is set by the user. */ -description: string | null; +description: string | null; /** * id of the current instance so we know who this `.db` is. This can be looked up within the `Instance` table. */ -instance_id: number; +instance_id: number; /** * cloud_id is the ID of the cloud library this library is linked to. * If this is set we can assume the library is synced with the Cloud. @@ -450,7 +447,7 @@ export type LocationSettings = { explorer: ExplorerSettings } * `LocationUpdateArgs` is the argument received from the client using `rspc` to update a location. * It contains the id of the location to be updated, possible a name to change the current location's name * and a vector of indexer rules ids to add or remove from the location. - * + * * It is important to note that only the indexer rule ids in this vector will be used from now on. * Old rules that aren't in this vector will be purged. */ @@ -474,13 +471,13 @@ export type Metadata = { album: string | null; album_artist: string | null; arti export type NodeConfigP2P = { discovery?: P2PDiscoveryState; port: Port; ipv4: boolean; ipv6: boolean; remote_access: boolean } -export type NodePreferences = { thumbnailer: ThumbnailerPreferences } +export type NodePreferences = Record -export type NodeState = ({ +export type NodeState = ({ /** * id is a unique identifier for the current node. Each node has a public identifier (this one) and is given a local id for each library (done within the library code). */ -id: string; +id: string; /** * name is the display name of the current node. This is set by the user and is shown in the UI. // TODO: Length validation so it can fit in DNS record */ @@ -585,7 +582,7 @@ export type SetFavoriteArgs = { id: number; favorite: boolean } export type SetNoteArgs = { id: number; note: string | null } -export type SingleInvalidateOperationEvent = { +export type SingleInvalidateOperationEvent = { /** * This fields are intentionally private. */ @@ -621,7 +618,11 @@ export type TestingParams = { id: string; path: string } export type TextMatch = { contains: string } | { startsWith: string } | { endsWith: string } | { equals: string } -export type ThumbnailerPreferences = { background_processing_percentage: number } +/** + * This type is used to pass the relevant data to the frontend so it can request the thumbnail. + * Tt supports extending the shard hex to support deeper directory structures in the future + */ +export type ThumbKey = { shard_hex: string; cas_id: string; base_directory_str: string } export type UpdateThumbnailerPreferences = { background_processing_percentage: number } diff --git a/packages/client/src/lib/explorerItem.ts b/packages/client/src/lib/explorerItem.ts index 5b2dfaab7d2d..8c54f577fc18 100644 --- a/packages/client/src/lib/explorerItem.ts +++ b/packages/client/src/lib/explorerItem.ts @@ -1,4 +1,4 @@ -import type { ExplorerItem } from '../core'; +import type { ExplorerItem, ThumbKey } from '../core'; import { getItemFilePath, getItemLocation, getItemObject } from '../utils'; import { humanizeSize } from './humanizeSize'; import { ObjectKind, ObjectKindKey } from './objectKind'; @@ -19,8 +19,8 @@ export interface ItemData { dateModified: string | null; dateAccessed: string | null; dateTaken: string | null; - thumbnailKey: string[]; // default behavior is to render a single thumbnail - thumbnailKeys?: string[][]; // if set, we can render multiple thumbnails + thumbnailKey: ThumbKey | null; // default behavior is to render a single thumbnail + thumbnailKeys?: ThumbKey[]; // if set, we can render multiple thumbnails hasLocalThumbnail: boolean; // this is overwritten when new thumbnails are generated customIcon: string | null; } @@ -102,7 +102,7 @@ export function getExplorerItemData(data?: ExplorerItem | null): ItemData { case 'Label': { itemData.name = data.item.name; itemData.customIcon = 'Tag'; - itemData.thumbnailKey = data.thumbnails[0] ?? []; + itemData.thumbnailKey = data.thumbnails[0] ?? null; itemData.thumbnailKeys = data.thumbnails; itemData.hasLocalThumbnail = !!data.thumbnails; itemData.kind = 'Label'; @@ -135,7 +135,7 @@ function getDefaultItemData(kind: ObjectKindKey = 'Unknown'): ItemData { dateModified: null, dateAccessed: null, dateTaken: null, - thumbnailKey: [], + thumbnailKey: null, hasLocalThumbnail: false, customIcon: null }; From 914ac94f0d5031e9f48f0c6d02c2d5e407b1f358 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Tue, 14 May 2024 15:46:46 -0300 Subject: [PATCH 03/33] Updating some deps due to crashes and bugs --- Cargo.lock | 185 ++++++++++++++++++++++++++++++----------------------- Cargo.toml | 42 ++++++------ 2 files changed, 127 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9c743a1621d2..43b119732530 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -369,13 +369,13 @@ dependencies = [ [[package]] name = "async-channel" -version = "2.1.1" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ca33f4bc4ed1babef42cad36cc1f51fa88be00420404e5b1e80ab1b18f7678c" +checksum = "9f2776ead772134d55b62dd45e59a79e21612d85d0af729b8b7d3967d601a62a" dependencies = [ "concurrent-queue", - "event-listener 4.0.1", - "event-listener-strategy 0.4.0", + "event-listener 5.3.0", + "event-listener-strategy 0.5.2", "futures-core", "pin-project-lite", ] @@ -535,9 +535,9 @@ checksum = "fbb36e985947064623dbd357f727af08ffd077f93d696782f3c56365fa2e2799" [[package]] name = "async-trait" -version = "0.1.77" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", @@ -951,7 +951,7 @@ dependencies = [ "aws-smithy-types", "bytes", "fastrand 2.0.1", - "h2", + "h2 0.3.22", "http 0.2.11", "http-body 0.4.6", "hyper 0.14.28", @@ -1944,9 +1944,9 @@ dependencies = [ [[package]] name = "concurrent-queue" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16048cd947b08fa32c24458a22f5dc5e835264f689f4f5653210c69fd107363" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" dependencies = [ "crossbeam-utils", ] @@ -3916,6 +3916,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "h2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "816ec7294445779408f36fe57bc5b7fc1cf59664059096c65f905c1c61f58069" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 1.1.0", + "indexmap 2.2.1", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "half" version = "2.3.1" @@ -4248,14 +4267,14 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", + "h2 0.3.22", "http 0.2.11", "http-body 0.4.6", "httparse", "httpdate", "itoa 1.0.10", "pin-project-lite", - "socket2 0.4.10", + "socket2 0.5.5", "tokio", "tower-service", "tracing", @@ -4271,6 +4290,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2 0.4.4", "http 1.1.0", "http-body 1.0.0", "httparse", @@ -4327,6 +4347,22 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "hyper-tls" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" +dependencies = [ + "bytes", + "http-body-util", + "hyper 1.3.1", + "hyper-util", + "native-tls", + "tokio", + "tokio-native-tls", + "tower-service", +] + [[package]] name = "hyper-util" version = "0.1.3" @@ -4713,9 +4749,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] @@ -4974,9 +5010,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.153" +version = "0.2.154" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +checksum = "ae743338b92ff9146ce83992f766a31066a91a8c84a45e0e9f21e7cf6de6d346" [[package]] name = "libdbus-sys" @@ -6326,11 +6362,11 @@ checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" [[package]] name = "normpath" -version = "1.1.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec60c60a693226186f5d6edf073232bfb6464ed97eb22cf3b01c1e8198fd97f5" +checksum = "5831952a9476f2fed74b77d74182fa5ddc4d21c72ec45a333b250e3ed0272804" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -7258,9 +7294,9 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" [[package]] name = "pin-utils" @@ -8149,7 +8185,7 @@ dependencies = [ "built", "cfg-if", "interpolate_name", - "itertools 0.12.0", + "itertools 0.12.1", "libc", "libfuzzer-sys", "log", @@ -8302,9 +8338,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.3" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick 1.1.2", "memchr", @@ -8380,11 +8416,11 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2", + "h2 0.3.22", "http 0.2.11", "http-body 0.4.6", "hyper 0.14.28", - "hyper-tls", + "hyper-tls 0.5.0", "ipnet", "js-sys", "log", @@ -8399,12 +8435,10 @@ dependencies = [ "system-configuration", "tokio", "tokio-native-tls", - "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams 0.3.0", "web-sys", "winreg 0.50.0", ] @@ -8417,18 +8451,23 @@ checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" dependencies = [ "base64 0.22.1", "bytes", + "encoding_rs", + "futures-channel", "futures-core", "futures-util", + "h2 0.4.4", "http 1.1.0", "http-body 1.0.0", "http-body-util", "hyper 1.3.1", "hyper-rustls 0.26.0", + "hyper-tls 0.6.0", "hyper-util", "ipnet", "js-sys", "log", "mime", + "native-tls", "once_cell", "percent-encoding", "pin-project-lite", @@ -8439,14 +8478,16 @@ dependencies = [ "serde_json", "serde_urlencoded", "sync_wrapper", + "system-configuration", "tokio", + "tokio-native-tls", "tokio-rustls 0.25.0", "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", - "wasm-streams 0.4.0", + "wasm-streams", "web-sys", "webpki-roots 0.26.1", "winreg 0.52.0", @@ -8554,9 +8595,9 @@ dependencies = [ [[package]] name = "rmp" -version = "0.8.12" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9860a6cc38ed1da53456442089b4dfa35e7cedaa326df63017af88385e6b20" +checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" dependencies = [ "byteorder", "num-traits", @@ -8565,9 +8606,9 @@ dependencies = [ [[package]] name = "rmp-serde" -version = "1.1.2" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bffea85eea980d8a74453e5d02a8d93028f3c34725de143085a844ebe953258a" +checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" dependencies = [ "byteorder", "rmp", @@ -8576,9 +8617,9 @@ dependencies = [ [[package]] name = "rmpv" -version = "1.0.1" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e0e0214a4a2b444ecce41a4025792fc31f77c7bb89c46d253953ea8c65701ec" +checksum = "58450723cd9ee93273ce44a20b6ec4efe17f8ed2e3631474387bfdecf18bb2a9" dependencies = [ "num-traits", "rmp", @@ -8985,7 +9026,7 @@ dependencies = [ "once_cell", "ort", "prisma-client-rust", - "reqwest 0.11.23", + "reqwest 0.12.4", "rmp-serde", "rmpv", "sd-core-file-path-helper", @@ -9021,7 +9062,7 @@ name = "sd-cloud-api" version = "0.1.0" dependencies = [ "base64 0.21.7", - "reqwest 0.11.23", + "reqwest 0.12.4", "rmpv", "rspc", "sd-p2p", @@ -9065,7 +9106,7 @@ dependencies = [ "icrate", "image", "int-enum", - "itertools 0.12.0", + "itertools 0.12.1", "libc", "mini-moka", "normpath", @@ -9077,7 +9118,7 @@ dependencies = [ "plist", "prisma-client-rust", "regex", - "reqwest 0.11.23", + "reqwest 0.12.4", "rmp", "rmp-serde", "rmpv", @@ -9159,7 +9200,7 @@ dependencies = [ "futures-concurrency", "globset", "image", - "itertools 0.12.0", + "itertools 0.12.1", "lending-stream", "once_cell", "prisma-client-rust", @@ -9286,7 +9327,7 @@ dependencies = [ "anyhow", "cargo_metadata 0.18.1", "clap", - "reqwest 0.11.23", + "reqwest 0.12.4", "serde", "serde_json", ] @@ -9473,7 +9514,7 @@ dependencies = [ "mdns-sd", "pin-project-lite", "rand_core 0.6.4", - "reqwest 0.11.23", + "reqwest 0.12.4", "serde", "sha256", "specta", @@ -9518,7 +9559,7 @@ version = "0.0.1" dependencies = [ "hex", "libp2p", - "reqwest 0.11.23", + "reqwest 0.12.4", "serde", "serde_json", "tokio", @@ -9721,9 +9762,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.197" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c" dependencies = [ "serde_derive", ] @@ -9767,9 +9808,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.201" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865" dependencies = [ "proc-macro2", "quote", @@ -9789,9 +9830,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" dependencies = [ "indexmap 2.2.1", "itoa 1.0.10", @@ -10360,7 +10401,7 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" dependencies = [ - "itertools 0.12.0", + "itertools 0.12.1", "nom", "unicode_categories", ] @@ -10459,9 +10500,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "strum" -version = "0.25.0" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" dependencies = [ "phf 0.10.1", "strum_macros", @@ -10469,9 +10510,9 @@ dependencies = [ [[package]] name = "strum_macros" -version = "0.25.3" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +checksum = "c6cf59daf282c0a494ba14fd21610a0325f9f90ec9d1231dea26bcb1d696c946" dependencies = [ "heck 0.4.1", "proc-macro2", @@ -11064,18 +11105,18 @@ checksum = "8eaa81235c7058867fa8c0e7314f33dcce9c215f535d1913822a2b3f5e289f3c" [[package]] name = "thiserror" -version = "1.0.57" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +checksum = "579e9083ca58dd9dcf91a9923bb9054071b9ebbd800b342194c9feb0ee89fc18" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.57" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +checksum = "e2470041c06ec3ac1ab38d0356a6119054dedaea53e12fbefc0de730a1c08524" dependencies = [ "proc-macro2", "quote", @@ -11185,9 +11226,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.36.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", @@ -11247,9 +11288,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" dependencies = [ "futures-core", "pin-project-lite", @@ -11271,9 +11312,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" dependencies = [ "bytes", "futures-core", @@ -11281,7 +11322,6 @@ dependencies = [ "futures-sink", "pin-project-lite", "tokio", - "tracing", ] [[package]] @@ -11954,9 +11994,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" dependencies = [ "getrandom 0.2.12", "serde", @@ -12153,19 +12193,6 @@ version = "0.2.89" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" -[[package]] -name = "wasm-streams" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7" -dependencies = [ - "futures-util", - "js-sys", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "wasm-streams" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 572ca0e8090a..38ebb83dbc74 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,8 +44,8 @@ swift-rs = { version = "1.0.6" } # Third party dependencies used by one or more of our crates anyhow = "1.0.75" -async-channel = "2.0.0" -async-trait = "0.1.77" +async-channel = "2.3.0" +async-trait = "0.1.80" axum = "=0.6.20" base64 = "0.21.5" blake3 = "1.5.0" @@ -53,38 +53,38 @@ chrono = "0.4.38" clap = "4.4.7" futures = "0.3.30" futures-concurrency = "7.6.0" -globset = "^0.4.13" +globset = "0.4.14" hex = "0.4.3" http = "0.2.9" image = "0.25.1" -itertools = "0.12.0" +itertools = "0.12.1" lending-stream = "1.0.0" -libc = "0.2" -normpath = "1.1.1" +libc = "0.2.154" +normpath = "1.2.0" once_cell = "1.19.0" -pin-project-lite = "0.2.13" +pin-project-lite = "0.2.14" rand = "0.8.5" rand_chacha = "0.3.1" -regex = "1.10.2" -reqwest = "0.11.22" -rmp-serde = "1.1.2" -rmpv = { version = "^1.0.1", features = ["with-serde"] } -serde = "1.0" -serde_json = "1.0" +regex = "1.10.4" +reqwest = "0.12.4" +rmp-serde = "1.3.0" +rmpv = { version = "1.3.0", features = ["with-serde"] } +serde = "1.0.201" +serde_json = "1.0.117" static_assertions = "1.1.0" -strum = "0.25" -strum_macros = "0.25" -tempfile = "3.8.1" -thiserror = "1.0.50" -tokio = "1.36.0" -tokio-stream = "0.1.14" -tokio-util = "0.7.10" +strum = "0.26.2" +strum_macros = "0.26.2" +tempfile = "3.10.1" +thiserror = "1.0.60" +tokio = "1.37.0" +tokio-stream = "0.1.15" +tokio-util = "0.7.11" tracing = "0.1.40" tracing-subscriber = "0.3.18" tracing-appender = "0.2.3" tracing-test = "^0.2.4" uhlc = "=0.5.2" -uuid = "1.5.0" +uuid = "1.8.0" webp = "0.3.0" [patch.crates-io] From a300a9c26f9ef020bdfdeedbf95650ef47895af1 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Tue, 14 May 2024 15:56:25 -0300 Subject: [PATCH 04/33] Exposing non critical errors to frontend --- .../heavy-lifting/src/file_identifier/mod.rs | 6 +-- .../tasks/extract_file_metadata.rs | 15 ++++-- core/crates/heavy-lifting/src/indexer/mod.rs | 15 +++--- .../heavy-lifting/src/indexer/tasks/walker.rs | 48 ++++++++++------- .../heavy-lifting/src/job_system/job.rs | 11 ++-- .../heavy-lifting/src/job_system/report.rs | 54 ++++++++----------- core/crates/heavy-lifting/src/lib.rs | 14 +++-- .../helpers/exif_media_data.rs | 4 +- .../helpers/ffmpeg_media_data.rs | 4 +- .../media_processor/helpers/thumbnailer.rs | 28 ++++++---- .../heavy-lifting/src/media_processor/mod.rs | 12 +++-- .../tasks/media_data_extractor.rs | 26 ++++----- .../src/media_processor/tasks/thumbnailer.rs | 48 +++++++++-------- core/src/location/mod.rs | 4 +- packages/client/src/core.ts | 30 +++++++++-- 15 files changed, 179 insertions(+), 140 deletions(-) diff --git a/core/crates/heavy-lifting/src/file_identifier/mod.rs b/core/crates/heavy-lifting/src/file_identifier/mod.rs index 996cd0e3aef5..15bccde154a3 100644 --- a/core/crates/heavy-lifting/src/file_identifier/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/mod.rs @@ -25,7 +25,6 @@ use cas_id::generate_cas_id; pub use job::FileIdentifier; pub use shallow::shallow; - // we break these tasks into chunks of 100 to improve performance const CHUNK_SIZE: usize = 100; @@ -54,8 +53,9 @@ impl From for rspc::Error { } } -#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)] -pub enum NonCriticalError { +#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)] +#[serde(rename_all = "snake_case")] +pub enum NonCriticalFileIdentifierError { #[error("failed to extract file metadata: {0}")] FailedToExtractFileMetadata(String), #[cfg(target_os = "windows")] diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs index 06b57e98d3d8..3bf36cc21ac3 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs @@ -208,15 +208,17 @@ fn handle_non_critical_errors( // Handle case where file is on-demand (NTFS only) if e.source.raw_os_error().map_or(false, |code| code == 362) { errors.push( - file_identifier::NonCriticalError::FailedToExtractMetadataFromOnDemandFile( + file_identifier::NonCriticalFileIdentifierError::FailedToExtractMetadataFromOnDemandFile( formatted_error, ) .into(), ); } else { errors.push( - file_identifier::NonCriticalError::FailedToExtractFileMetadata(formatted_error) - .into(), + file_identifier::NonCriticalFileIdentifierError::FailedToExtractFileMetadata( + formatted_error, + ) + .into(), ); } } @@ -224,7 +226,10 @@ fn handle_non_critical_errors( #[cfg(not(target_os = "windows"))] { errors.push( - file_identifier::NonCriticalError::FailedToExtractFileMetadata(formatted_error).into(), + file_identifier::NonCriticalFileIdentifierError::FailedToExtractFileMetadata( + formatted_error, + ) + .into(), ); } } @@ -242,7 +247,7 @@ fn try_iso_file_path_extraction( .map_err(|e| { error!("Failed to extract isolated file path data: {e:#?}"); errors.push( - file_identifier::NonCriticalError::FailedToExtractIsolatedFilePathData(format!( + file_identifier::NonCriticalFileIdentifierError::FailedToExtractIsolatedFilePathData(format!( "" )) .into(), diff --git a/core/crates/heavy-lifting/src/indexer/mod.rs b/core/crates/heavy-lifting/src/indexer/mod.rs index 3d2cb8a49475..8d6ad4e3d9fb 100644 --- a/core/crates/heavy-lifting/src/indexer/mod.rs +++ b/core/crates/heavy-lifting/src/indexer/mod.rs @@ -87,8 +87,9 @@ impl From for rspc::Error { } } -#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)] -pub enum NonCriticalError { +#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)] +#[serde(rename_all = "snake_case")] +pub enum NonCriticalIndexerError { #[error("failed to read directory entry: {0}")] FailedDirectoryEntry(String), #[error("failed to fetch metadata: {0}")] @@ -278,7 +279,7 @@ pub async fn reverse_update_directories_sizes( IsolatedFilePathData::try_from(file_path) .map_err(|e| { errors.push( - NonCriticalError::MissingFilePathData(format!( + NonCriticalIndexerError::MissingFilePathData(format!( "Found a file_path missing data: , error: {e:#?}", from_bytes_to_uuid(&pub_id) )) @@ -370,7 +371,7 @@ async fn compute_sizes( } } else { errors.push( - NonCriticalError::MissingFilePathData(format!( + NonCriticalIndexerError::MissingFilePathData(format!( "Corrupt database possessing a file_path entry without materialized_path: ", from_bytes_to_uuid(&file_path.pub_id) )) @@ -434,7 +435,7 @@ impl walker::WalkerDBProxy for WalkerDBProxy { &self, parent_iso_file_path: &IsolatedFilePathData<'_>, unique_location_id_materialized_path_name_extension_params: Vec, - ) -> Result, NonCriticalError> { + ) -> Result, NonCriticalIndexerError> { // NOTE: This batch size can be increased if we wish to trade memory for more performance const BATCH_SIZE: i64 = 1000; @@ -460,7 +461,7 @@ impl walker::WalkerDBProxy for WalkerDBProxy { .flat_map(|file_paths| file_paths.into_iter().map(|file_path| file_path.id)) .collect::>() }) - .map_err(|e| NonCriticalError::FetchAlreadyExistingFilePathIds(e.to_string()))?; + .map_err(|e| NonCriticalIndexerError::FetchAlreadyExistingFilePathIds(e.to_string()))?; let mut to_remove = vec![]; let mut cursor = 1; @@ -483,7 +484,7 @@ impl walker::WalkerDBProxy for WalkerDBProxy { .select(file_path_pub_and_cas_ids::select()) .exec() .await - .map_err(|e| NonCriticalError::FetchFilePathsToRemove(e.to_string()))?; + .map_err(|e| NonCriticalIndexerError::FetchFilePathsToRemove(e.to_string()))?; #[allow(clippy::cast_possible_truncation)] // Safe because we are using a constant let should_stop = found.len() < BATCH_SIZE as usize; diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker.rs index f99bc2fab1e7..5e568fde7394 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/walker.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker.rs @@ -117,8 +117,9 @@ pub trait WalkerDBProxy: Clone + Send + Sync + fmt::Debug + 'static { &self, parent_iso_file_path: &IsolatedFilePathData<'_>, unique_location_id_materialized_path_name_extension_params: Vec, - ) -> impl Future, indexer::NonCriticalError>> - + Send; + ) -> impl Future< + Output = Result, indexer::NonCriticalIndexerError>, + > + Send; } #[derive(Debug, Serialize, Deserialize)] @@ -161,7 +162,10 @@ struct InnerMetadata { } impl InnerMetadata { - fn new(path: impl AsRef, metadata: &Metadata) -> Result { + fn new( + path: impl AsRef, + metadata: &Metadata, + ) -> Result { let FilePathMetadata { inode, size_in_bytes, @@ -169,7 +173,7 @@ impl InnerMetadata { modified_at, hidden, } = FilePathMetadata::from_path(path, metadata) - .map_err(|e| indexer::NonCriticalError::FilePathMetadata(e.to_string()))?; + .map_err(|e| indexer::NonCriticalIndexerError::FilePathMetadata(e.to_string()))?; Ok(Self { is_dir: metadata.is_dir(), @@ -575,7 +579,7 @@ where } Err(e) => { errors.push(NonCriticalError::Indexer( - indexer::NonCriticalError::FailedDirectoryEntry( + indexer::NonCriticalIndexerError::FailedDirectoryEntry( FileIOError::from((&path, e)).to_string(), ), )); @@ -816,7 +820,9 @@ async fn keep_walking( db_proxy.clone(), dispatcher.clone(), ) - .map_err(|e| indexer::NonCriticalError::DispatchKeepWalking(e.to_string())) + .map_err(|e| { + indexer::NonCriticalIndexerError::DispatchKeepWalking(e.to_string()) + }) }) .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()), ) @@ -836,7 +842,7 @@ async fn collect_metadata( fs::metadata(¤t_path) .await .map_err(|e| { - indexer::NonCriticalError::Metadata( + indexer::NonCriticalIndexerError::Metadata( FileIOError::from((¤t_path, e)).to_string(), ) }) @@ -869,7 +875,7 @@ async fn apply_indexer_rules( .map(|acceptance_per_rule_kind| { (current_path, (metadata, acceptance_per_rule_kind)) }) - .map_err(|e| indexer::NonCriticalError::IndexerRule(e.to_string())) + .map_err(|e| indexer::NonCriticalIndexerError::IndexerRule(e.to_string())) }) .collect::>() .join() @@ -960,7 +966,7 @@ async fn process_rules_results( fs::metadata(&ancestor_path) .await .map_err(|e| { - indexer::NonCriticalError::Metadata( + indexer::NonCriticalIndexerError::Metadata( FileIOError::from((&ancestor_path, e)).to_string(), ) }) @@ -973,7 +979,9 @@ async fn process_rules_results( } .into() }) - .map_err(|e| indexer::NonCriticalError::FilePathMetadata(e.to_string())) + .map_err(|e| { + indexer::NonCriticalIndexerError::FilePathMetadata(e.to_string()) + }) }) }) .collect::>() @@ -1040,10 +1048,9 @@ fn accept_ancestors( .skip(1) // Skip the current directory as it was already indexed .take_while(|&ancestor| ancestor != root) { - if let Ok(iso_file_path) = iso_file_path_factory - .build(ancestor, true) - .map_err(|e| errors.push(indexer::NonCriticalError::IsoFilePath(e.to_string()).into())) - { + if let Ok(iso_file_path) = iso_file_path_factory.build(ancestor, true).map_err(|e| { + errors.push(indexer::NonCriticalIndexerError::IsoFilePath(e.to_string()).into()); + }) { match accepted_ancestors.entry(iso_file_path) { Entry::Occupied(_) => { // If we already accepted this ancestor, then it will contain @@ -1111,7 +1118,8 @@ async fn gather_file_paths_to_remove( ) }) .map_err(|e| { - errors.push(indexer::NonCriticalError::IsoFilePath(e.to_string()).into()); + errors + .push(indexer::NonCriticalIndexerError::IsoFilePath(e.to_string()).into()); }) .ok() }) @@ -1133,11 +1141,11 @@ async fn gather_file_paths_to_remove( mod tests { use super::*; + use futures::stream::FuturesUnordered; use sd_core_indexer_rules::{IndexerRule, RulePerKind}; use sd_task_system::{TaskOutput, TaskStatus, TaskSystem}; use chrono::Utc; - use futures_concurrency::future::FutureGroup; use globset::{Glob, GlobSetBuilder}; use lending_stream::{LendingStream, StreamExt}; use tempfile::{tempdir, TempDir}; @@ -1175,7 +1183,7 @@ mod tests { &self, _: &IsolatedFilePathData<'_>, _: Vec, - ) -> Result, indexer::NonCriticalError> { + ) -> Result, indexer::NonCriticalIndexerError> { Ok(vec![]) } } @@ -1330,9 +1338,9 @@ mod tests { ) .await; - let mut group = FutureGroup::new(); + let group = FuturesUnordered::new(); - group.insert(handle); + group.push(handle); let mut group = group.lend_mut(); @@ -1359,7 +1367,7 @@ mod tests { ancestors.extend(accepted_ancestors); for handle in handles { - group.insert(handle); + group.push(handle); } } diff --git a/core/crates/heavy-lifting/src/job_system/job.rs b/core/crates/heavy-lifting/src/job_system/job.rs index 2ff647e0dda1..e8d73996718d 100644 --- a/core/crates/heavy-lifting/src/job_system/job.rs +++ b/core/crates/heavy-lifting/src/job_system/job.rs @@ -243,14 +243,11 @@ impl JobOutput { debug!("Job completed", report.id, report.name); } else { report.status = Status::CompletedWithErrors; - report.non_critical_errors = non_critical_errors - .iter() - .map(ToString::to_string) - .collect(); + report.non_critical_errors.extend(non_critical_errors); warn!( - "Job completed with errors: {non_critical_errors:#?}", - report.id, report.name + "Job completed with errors: {:#?}", + report.id, report.name, report.non_critical_errors ); } @@ -266,7 +263,7 @@ impl JobOutput { job_name: report.name, data, metadata: report.metadata.clone(), - non_critical_errors, + non_critical_errors: report.non_critical_errors.clone(), } } } diff --git a/core/crates/heavy-lifting/src/job_system/report.rs b/core/crates/heavy-lifting/src/job_system/report.rs index f6d9dab17cbd..7bc17e56c821 100644 --- a/core/crates/heavy-lifting/src/job_system/report.rs +++ b/core/crates/heavy-lifting/src/job_system/report.rs @@ -10,6 +10,8 @@ use specta::Type; use strum::ParseError; use tracing::error; +use crate::NonCriticalError; + use super::{job::JobName, JobId}; #[derive(thiserror::Error, Debug)] @@ -22,10 +24,8 @@ pub enum ReportError { InvalidJobStatusInt(i32), #[error("job not found in database: ")] MissingReport(JobId), - #[error("serialization error: {0}")] - Serialization(#[from] rmp_serde::encode::Error), - #[error("deserialization error: {0}")] - Deserialization(#[from] rmp_serde::decode::Error), + #[error("json error: {0}")] + Json(#[from] serde_json::Error), #[error(transparent)] MissingField(#[from] MissingFieldError), #[error("failed to parse job name from database: {0}")] @@ -44,10 +44,7 @@ impl From for rspc::Error { ReportError::MissingReport(_) => { Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e) } - ReportError::Serialization(_) - | ReportError::Deserialization(_) - | ReportError::MissingField(_) - | ReportError::JobNameParse(_) => { + ReportError::Json(_) | ReportError::MissingField(_) | ReportError::JobNameParse(_) => { Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e) } } @@ -55,12 +52,14 @@ impl From for rspc::Error { } #[derive(Debug, Serialize, Deserialize, Type, Clone)] +#[serde(rename_all = "snake_case")] pub enum ReportMetadata { Input(ReportInputMetadata), Output(ReportOutputMetadata), } #[derive(Debug, Serialize, Deserialize, Type, Clone)] +#[serde(rename_all = "snake_case")] pub enum ReportInputMetadata { // TODO: Add more variants as needed Location(location::Data), @@ -68,6 +67,7 @@ pub enum ReportInputMetadata { } #[derive(Debug, Serialize, Deserialize, Type, Clone)] +#[serde(rename_all = "snake_case")] pub enum ReportOutputMetadata { Metrics(HashMap), // TODO: Add more variants as needed @@ -81,7 +81,7 @@ pub struct Report { pub metadata: Vec, pub critical_error: Option, - pub non_critical_errors: Vec, + pub non_critical_errors: Vec, pub created_at: Option>, pub started_at: Option>, @@ -118,25 +118,17 @@ impl TryFrom for Report { name: JobName::from_str(&maybe_missing(data.name, "job.name")?)?, action: data.action, - metadata: data - .metadata - .map(|m| { - rmp_serde::from_slice(&m).unwrap_or_else(|e| { - error!("Failed to deserialize job metadata: {e:#?}"); - vec![] - }) - }) - .unwrap_or_default(), + metadata: if let Some(metadata) = data.metadata { + serde_json::from_slice(&metadata)? + } else { + vec![] + }, critical_error: data.critical_error, - non_critical_errors: data.non_critical_errors.map_or_else( - Default::default, - |non_critical_errors| { - serde_json::from_slice(&non_critical_errors).unwrap_or_else(|e| { - error!("Failed to deserialize job non-critical errors: {e:#?}"); - vec![] - }) - }, - ), + non_critical_errors: if let Some(non_critical_errors) = data.non_critical_errors { + serde_json::from_slice(&non_critical_errors)? + } else { + vec![] + }, created_at: data.date_created.map(DateTime::into), started_at: data.date_started.map(DateTime::into), completed_at: data.date_completed.map(DateTime::into), @@ -209,10 +201,10 @@ impl Report { job::name::set(Some(self.name.to_string())), job::action::set(self.action.clone()), job::date_created::set(Some(now.into())), - job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)), + job::metadata::set(Some(serde_json::to_vec(&self.metadata)?)), job::status::set(Some(self.status as i32)), job::date_started::set(self.started_at.map(Into::into)), - job::task_count::set(Some(1)), + job::task_count::set(Some(0)), job::completed_task_count::set(Some(0)), ], [self @@ -237,10 +229,10 @@ impl Report { vec![ job::status::set(Some(self.status as i32)), job::critical_error::set(self.critical_error.clone()), - job::non_critical_errors::set(Some(rmp_serde::to_vec( + job::non_critical_errors::set(Some(serde_json::to_vec( &self.non_critical_errors, )?)), - job::metadata::set(Some(rmp_serde::to_vec(&self.metadata)?)), + job::metadata::set(Some(serde_json::to_vec(&self.metadata)?)), job::task_count::set(Some(self.task_count)), job::completed_task_count::set(Some(self.completed_task_count)), job::date_started::set(self.started_at.map(Into::into)), diff --git a/core/crates/heavy-lifting/src/lib.rs b/core/crates/heavy-lifting/src/lib.rs index b9401559fd94..5898d3b6caba 100644 --- a/core/crates/heavy-lifting/src/lib.rs +++ b/core/crates/heavy-lifting/src/lib.rs @@ -28,6 +28,8 @@ #![forbid(deprecated_in_future)] #![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)] +use file_identifier::NonCriticalFileIdentifierError; +use indexer::NonCriticalIndexerError; use sd_prisma::prisma::file_path; use sd_task_system::TaskSystemError; @@ -41,13 +43,14 @@ pub mod job_system; pub mod media_processor; pub mod utils; -use media_processor::ThumbKey; +use media_processor::{NonCriticalMediaProcessorError, ThumbKey}; pub use job_system::{ job::{ IntoJob, JobContext, JobEnqueuer, JobName, JobOutput, JobOutputData, OuterContext, ProgressUpdate, }, + report::Report, JobId, JobSystem, JobSystemError, }; @@ -77,15 +80,16 @@ impl From for rspc::Error { } } -#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)] +#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)] +#[serde(rename_all = "snake_case")] pub enum NonCriticalError { // TODO: Add variants as needed #[error(transparent)] - Indexer(#[from] indexer::NonCriticalError), + Indexer(#[from] NonCriticalIndexerError), #[error(transparent)] - FileIdentifier(#[from] file_identifier::NonCriticalError), + FileIdentifier(#[from] NonCriticalFileIdentifierError), #[error(transparent)] - MediaProcessor(#[from] media_processor::NonCriticalError), + MediaProcessor(#[from] NonCriticalMediaProcessorError), } #[repr(i32)] diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs b/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs index fcbd020ab455..3cc9de27ed40 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs @@ -91,11 +91,11 @@ fn to_query( pub async fn extract( path: impl AsRef + Send, -) -> Result, media_processor::NonCriticalError> { +) -> Result, media_processor::NonCriticalMediaProcessorError> { let path = path.as_ref(); ExifMetadata::from_path(&path).await.map_err(|e| { - media_data_extractor::NonCriticalError::FailedToExtractImageMediaData( + media_data_extractor::NonCriticalMediaDataExtractorError::FailedToExtractImageMediaData( path.to_path_buf(), e.to_string(), ) diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs index f8ead71170fe..da23092bc343 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs @@ -91,11 +91,11 @@ pub const fn can_extract_for_video(video_extension: VideoExtension) -> bool { pub async fn extract( path: impl AsRef + Send, -) -> Result { +) -> Result { let path = path.as_ref(); FFmpegMetadata::from_path(&path).await.map_err(|e| { - media_data_extractor::NonCriticalError::FailedToExtractImageMediaData( + media_data_extractor::NonCriticalMediaDataExtractorError::FailedToExtractImageMediaData( path.to_path_buf(), e.to_string(), ) diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs index 20e7164d8e95..0409b529e5bb 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs @@ -230,7 +230,7 @@ pub async fn generate_thumbnail( should_regenerate: bool, ) -> ( Duration, - Result<(ThumbKey, GenerationStatus), thumbnailer::NonCriticalError>, + Result<(ThumbKey, GenerationStatus), thumbnailer::NonCriticalThumbnailerError>, ) { trace!("Generating thumbnail for {}", path.display()); let start = Instant::now(); @@ -301,15 +301,18 @@ pub async fn generate_thumbnail( async fn generate_image_thumbnail( file_path: impl AsRef + Send, output_path: impl AsRef + Send, -) -> Result<(), thumbnailer::NonCriticalError> { +) -> Result<(), thumbnailer::NonCriticalThumbnailerError> { let file_path = file_path.as_ref().to_path_buf(); let webp = spawn_blocking({ let file_path = file_path.clone(); - move || -> Result<_, thumbnailer::NonCriticalError> { + move || -> Result<_, thumbnailer::NonCriticalThumbnailerError> { let mut img = format_image(&file_path).map_err(|e| { - thumbnailer::NonCriticalError::FormatImage(file_path.clone(), e.to_string()) + thumbnailer::NonCriticalThumbnailerError::FormatImage( + file_path.clone(), + e.to_string(), + ) })?; let (w, h) = img.dimensions(); @@ -340,7 +343,10 @@ async fn generate_image_thumbnail( // Create the WebP encoder for the above image let encoder = Encoder::from_image(&img).map_err(|reason| { - thumbnailer::NonCriticalError::WebPEncoding(file_path, reason.to_string()) + thumbnailer::NonCriticalThumbnailerError::WebPEncoding( + file_path, + reason.to_string(), + ) })?; // Type `WebPMemory` is !Send, which makes the `Future` in this function `!Send`, @@ -351,7 +357,7 @@ async fn generate_image_thumbnail( }) .await .map_err(|e| { - thumbnailer::NonCriticalError::PanicWhileGeneratingThumbnail( + thumbnailer::NonCriticalThumbnailerError::PanicWhileGeneratingThumbnail( file_path.clone(), e.to_string(), ) @@ -361,7 +367,7 @@ async fn generate_image_thumbnail( if let Some(shard_dir) = output_path.parent() { fs::create_dir_all(shard_dir).await.map_err(|e| { - thumbnailer::NonCriticalError::CreateShardDirectory( + thumbnailer::NonCriticalThumbnailerError::CreateShardDirectory( FileIOError::from((shard_dir, e)).to_string(), ) })?; @@ -373,7 +379,7 @@ async fn generate_image_thumbnail( } fs::write(output_path, &webp).await.map_err(|e| { - thumbnailer::NonCriticalError::SaveThumbnail( + thumbnailer::NonCriticalThumbnailerError::SaveThumbnail( file_path, FileIOError::from((output_path, e)).to_string(), ) @@ -384,7 +390,7 @@ async fn generate_image_thumbnail( async fn generate_video_thumbnail( file_path: impl AsRef + Send, output_path: impl AsRef + Send, -) -> Result<(), thumbnailer::NonCriticalError> { +) -> Result<(), thumbnailer::NonCriticalThumbnailerError> { use sd_ffmpeg::{to_thumbnail, ThumbnailSize}; let file_path = file_path.as_ref(); @@ -397,7 +403,7 @@ async fn generate_video_thumbnail( ) .await .map_err(|e| { - thumbnailer::NonCriticalError::VideoThumbnailGenerationFailed( + thumbnailer::NonCriticalThumbnailerError::VideoThumbnailGenerationFailed( file_path.to_path_buf(), e.to_string(), ) @@ -415,7 +421,7 @@ pub async fn generate_single_thumbnail( cas_id: String, path: impl AsRef + Send, kind: ThumbnailKind, -) -> Result<(), thumbnailer::NonCriticalError> { +) -> Result<(), thumbnailer::NonCriticalThumbnailerError> { let mut last_single_thumb_generated_guard = LAST_SINGLE_THUMB_GENERATED_LOCK.lock().await; let elapsed = Instant::now() - *last_single_thumb_generated_guard; diff --git a/core/crates/heavy-lifting/src/media_processor/mod.rs b/core/crates/heavy-lifting/src/media_processor/mod.rs index 02ab0481ef1d..5cf401709f1e 100644 --- a/core/crates/heavy-lifting/src/media_processor/mod.rs +++ b/core/crates/heavy-lifting/src/media_processor/mod.rs @@ -33,7 +33,8 @@ pub use helpers::thumbnailer::can_generate_thumbnail_for_video; pub use shallow::shallow; -use self::thumbnailer::NewThumbnailReporter; +use media_data_extractor::NonCriticalMediaDataExtractorError; +use thumbnailer::{NewThumbnailReporter, NonCriticalThumbnailerError}; const BATCH_SIZE: usize = 10; @@ -62,12 +63,13 @@ impl From for rspc::Error { } } -#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)] -pub enum NonCriticalError { +#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)] +#[serde(rename_all = "snake_case")] +pub enum NonCriticalMediaProcessorError { #[error(transparent)] - MediaDataExtractor(#[from] media_data_extractor::NonCriticalError), + MediaDataExtractor(#[from] NonCriticalMediaDataExtractorError), #[error(transparent)] - Thumbnailer(#[from] thumbnailer::NonCriticalError), + Thumbnailer(#[from] NonCriticalThumbnailerError), } #[derive(Clone)] diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs b/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs index fe7d713d10e7..66ecc6a155f0 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs @@ -29,8 +29,8 @@ use std::{ time::Duration, }; -use futures::{FutureExt, StreamExt}; -use futures_concurrency::future::{FutureGroup, Race}; +use futures::{stream::FuturesUnordered, FutureExt, StreamExt}; +use futures_concurrency::future::Race; use serde::{Deserialize, Serialize}; use specta::Type; use tokio::time::Instant; @@ -91,8 +91,10 @@ impl MediaDataExtractor { true } else { output.errors.push( - media_processor::NonCriticalError::from( - NonCriticalError::FilePathMissingObjectId(file_path.id), + media_processor::NonCriticalMediaProcessorError::from( + NonCriticalMediaDataExtractorError::FilePathMissingObjectId( + file_path.id, + ), ) .into(), ); @@ -279,8 +281,8 @@ impl Task for MediaDataExtractor { } } -#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)] -pub enum NonCriticalError { +#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)] +pub enum NonCriticalMediaDataExtractorError { #[error("failed to extract media data from : {1}", .0.display())] FailedToExtractImageMediaData(PathBuf, String), #[error("file path missing object id: ")] @@ -428,8 +430,8 @@ fn filter_files_to_extract_media_data( IsolatedFilePathData::try_from((location_id, file_path)) .map_err(|e| { errors.push( - media_processor::NonCriticalError::from( - NonCriticalError::FailedToConstructIsolatedFilePathData( + media_processor::NonCriticalMediaProcessorError::from( + NonCriticalMediaDataExtractorError::FailedToConstructIsolatedFilePathData( file_path.id, e.to_string(), ), @@ -455,8 +457,8 @@ fn filter_files_to_extract_media_data( } enum ExtractionOutputKind { - Exif(Result, media_processor::NonCriticalError>), - FFmpeg(Result), + Exif(Result, media_processor::NonCriticalMediaProcessorError>), + FFmpeg(Result), } struct ExtractionOutput { @@ -481,7 +483,7 @@ fn prepare_extraction_futures<'a>( kind: Kind, paths_by_id: &'a HashMap, interrupter: &'a Interrupter, -) -> FutureGroup + 'a> { +) -> FuturesUnordered + 'a> { paths_by_id .iter() .map( @@ -508,7 +510,7 @@ fn prepare_extraction_futures<'a>( ) .race() }) - .collect::>() + .collect::>() } #[inline] diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs index 9e41eb14d1ff..e410fbeb84e2 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs @@ -39,8 +39,8 @@ use std::{ time::Duration, }; -use futures::{FutureExt, StreamExt}; -use futures_concurrency::future::{FutureGroup, Race}; +use futures::{stream::FuturesUnordered, FutureExt, StreamExt}; +use futures_concurrency::future::Race; use serde::{Deserialize, Serialize}; use specta::Type; @@ -55,9 +55,9 @@ pub trait NewThumbnailReporter: Send + Sync + fmt::Debug + 'static { } #[derive(Debug)] -pub struct Thumbnailer { +pub struct Thumbnailer { id: TaskId, - reporter: Reporter, + reporter: Arc, thumbs_kind: ThumbnailKind, thumbnails_directory_path: Arc, thumbnails_to_generate: HashMap, @@ -68,7 +68,7 @@ pub struct Thumbnailer { } #[async_trait::async_trait] -impl Task for Thumbnailer { +impl Task for Thumbnailer { fn id(&self) -> TaskId { self.id } @@ -124,7 +124,9 @@ impl Task for Thumbnailer { *id, ( THUMBNAIL_GENERATION_TIMEOUT, - Err(NonCriticalError::ThumbnailGenerationTimeout(path.clone())), + Err(NonCriticalThumbnailerError::ThumbnailGenerationTimeout( + path.clone(), + )), ), ) }), @@ -137,14 +139,14 @@ impl Task for Thumbnailer { interrupter.into_future().map(InterruptRace::Interrupted) ) .race()) - .collect::>()); + .collect::>()); while let Some(race_output) = futures.next().await { match race_output { InterruptRace::Processed(out) => process_thumbnail_generation_output( out, *with_priority, - reporter, + reporter.as_ref(), already_processed_ids, output, ), @@ -195,8 +197,8 @@ pub struct Output { pub std_dev_acc: f64, } -#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type)] -pub enum NonCriticalError { +#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)] +pub enum NonCriticalThumbnailerError { #[error("file path has no cas_id")] MissingCasId(file_path::id::Type), #[error("failed to extract isolated file path data from file path : {1}")] @@ -217,7 +219,7 @@ pub enum NonCriticalError { ThumbnailGenerationTimeout(PathBuf), } -impl Thumbnailer { +impl Thumbnailer { fn new( thumbs_kind: ThumbnailKind, thumbnails_directory_path: Arc, @@ -225,7 +227,7 @@ impl Thumbnailer { errors: Vec, should_regenerate: bool, with_priority: bool, - reporter: Reporter, + reporter: Arc, ) -> Self { Self { id: TaskId::new_v4(), @@ -247,7 +249,7 @@ impl Thumbnailer { pub fn new_ephemeral( thumbnails_directory_path: Arc, thumbnails_to_generate: Vec, - reporter: Reporter, + reporter: Arc, ) -> Self { Self::new( ThumbnailKind::Ephemeral, @@ -279,7 +281,7 @@ impl Thumbnailer { library_id: Uuid, should_regenerate: bool, with_priority: bool, - reporter: Reporter, + reporter: Arc, ) -> Self { let mut errors = Vec::new(); @@ -294,8 +296,8 @@ impl Thumbnailer { IsolatedFilePathData::try_from((location_id, file_path)) .map_err(|e| { errors.push( - media_processor::NonCriticalError::from( - NonCriticalError::FailedToExtractIsolatedFilePathData( + media_processor::NonCriticalMediaProcessorError::from( + NonCriticalThumbnailerError::FailedToExtractIsolatedFilePathData( file_path_id, e.to_string(), ), @@ -307,8 +309,8 @@ impl Thumbnailer { .map(|iso_file_path| (file_path_id, cas_id, iso_file_path)) } else { errors.push( - media_processor::NonCriticalError::from( - NonCriticalError::MissingCasId(file_path.id), + media_processor::NonCriticalMediaProcessorError::from( + NonCriticalThumbnailerError::MissingCasId(file_path.id), ) .into(), ); @@ -351,12 +353,12 @@ struct SaveState { output: Output, } -impl SerializableTask for Thumbnailer { +impl SerializableTask for Thumbnailer { type SerializeError = rmp_serde::encode::Error; type DeserializeError = rmp_serde::decode::Error; - type DeserializeCtx = Reporter; + type DeserializeCtx = Arc; async fn serialize(self) -> Result, Self::SerializeError> { let Self { @@ -418,14 +420,14 @@ type ThumbnailGenerationOutput = ( ThumbnailId, ( Duration, - Result<(ThumbKey, GenerationStatus), NonCriticalError>, + Result<(ThumbKey, GenerationStatus), NonCriticalThumbnailerError>, ), ); fn process_thumbnail_generation_output( (id, (elapsed_time, res)): ThumbnailGenerationOutput, with_priority: bool, - reporter: &impl NewThumbnailReporter, + reporter: &dyn NewThumbnailReporter, already_processed_ids: &mut Vec, Output { generated, @@ -462,7 +464,7 @@ fn process_thumbnail_generation_output( } } Err(e) => { - errors.push(media_processor::NonCriticalError::from(e).into()); + errors.push(media_processor::NonCriticalMediaProcessorError::from(e).into()); *skipped += 1; } } diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index 4154dd477e92..68201dd7a336 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -26,7 +26,7 @@ use sd_sync::*; use sd_utils::{ db::{maybe_missing, MissingFieldError}, error::{FileIOError, NonUtf8PathError}, - msgpack, + msgpack, uuid_to_bytes, }; use std::{ @@ -647,7 +647,7 @@ pub async fn relink_location( metadata.relink(*id, location_path).await?; - let pub_id = metadata.location_pub_id(*id)?.as_ref().to_vec(); + let pub_id = uuid_to_bytes(metadata.location_pub_id(*id)?); let path = location_path .to_str() .map(str::to_string) diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index f1364efbc712..232d1397d541 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -373,13 +373,11 @@ export type IndexerRuleCreateArgs = { name: string; dry_run: boolean; rules: ([R export type InvalidateOperationEvent = { type: "single"; data: SingleInvalidateOperationEvent } | { type: "all" } -export type JobGroup = { id: string; action: string | null; status: JobStatus; created_at: string; jobs: JobReport[] } +export type JobGroup = { id: string; action: string | null; status: Status; created_at: string; jobs: Report[] } -export type JobProgressEvent = { id: string; library_id: string; task_count: number; completed_task_count: number; phase: string; message: string; estimated_completion: string } - -export type JobReport = { id: string; name: string; action: string | null; data: number[] | null; metadata: { [key in string]: JsonValue } | null; errors_text: string[]; created_at: string | null; started_at: string | null; completed_at: string | null; parent_id: string | null; status: JobStatus; task_count: number; completed_task_count: number; phase: string; message: string; estimated_completion: string } +export type JobName = "Indexer" | "FileIdentifier" | "MediaProcessor" -export type JobStatus = "Queued" | "Running" | "Completed" | "Canceled" | "Failed" | "Paused" | "CompletedWithErrors" +export type JobProgressEvent = { id: string; library_id: string; task_count: number; completed_task_count: number; phase: string; message: string; estimated_completion: string } export type JsonValue = null | boolean | number | string | JsonValue[] | { [key in string]: JsonValue } @@ -483,6 +481,18 @@ id: string; */ name: string; identity: RemoteIdentity; p2p: NodeConfigP2P; features: BackendFeature[]; preferences: NodePreferences; image_labeler_version: string | null }) & { data_path: string; device_model: string | null } +export type NonCriticalError = { indexer: NonCriticalIndexerError } | { file_identifier: NonCriticalFileIdentifierError } | { media_processor: NonCriticalMediaProcessorError } + +export type NonCriticalFileIdentifierError = { failed_to_extract_file_metadata: string } | { failed_to_extract_isolated_file_path_data: string } + +export type NonCriticalIndexerError = { failed_directory_entry: string } | { metadata: string } | { indexer_rule: string } | { file_path_metadata: string } | { fetch_already_existing_file_path_ids: string } | { fetch_file_paths_to_remove: string } | { iso_file_path: string } | { dispatch_keep_walking: string } | { missing_file_path_data: string } + +export type NonCriticalMediaDataExtractorError = { FailedToExtractImageMediaData: [string, string] } | { FilePathMissingObjectId: number } | { FailedToConstructIsolatedFilePathData: [number, string] } + +export type NonCriticalMediaProcessorError = { media_data_extractor: NonCriticalMediaDataExtractorError } | { thumbnailer: NonCriticalThumbnailerError } + +export type NonCriticalThumbnailerError = { MissingCasId: number } | { FailedToExtractIsolatedFilePathData: [number, string] } | { VideoThumbnailGenerationFailed: [string, string] } | { FormatImage: [string, string] } | { WebPEncoding: [string, string] } | { PanicWhileGeneratingThumbnail: [string, string] } | { CreateShardDirectory: string } | { SaveThumbnail: [string, string] } | { ThumbnailGenerationTimeout: string } + export type NonIndexedPathItem = { path: string; name: string; extension: string; kind: number; is_dir: boolean; date_created: string; date_modified: string; size_in_bytes_bytes: number[]; hidden: boolean } /** @@ -562,6 +572,14 @@ export type RenameMany = { from_pattern: FromPattern; to_pattern: string; from_f export type RenameOne = { from_file_path_id: number; to: string } +export type Report = { id: string; name: JobName; action: string | null; metadata: ReportMetadata[]; critical_error: string | null; non_critical_errors: NonCriticalError[]; created_at: string | null; started_at: string | null; completed_at: string | null; parent_id: string | null; status: Status; task_count: number; completed_task_count: number; phase: string; message: string; estimated_completion: string } + +export type ReportInputMetadata = { location: Location } | { sub_path: string } + +export type ReportMetadata = { input: ReportInputMetadata } | { output: ReportOutputMetadata } + +export type ReportOutputMetadata = { metrics: { [key in string]: JsonValue } } + export type RescanArgs = { location_id: number; sub_path: string } export type Resolution = { width: number; height: number } @@ -596,6 +614,8 @@ export type Statistics = { id: number; date_captured: string; total_object_count export type StatisticsResponse = { statistics: Statistics | null } +export type Status = "Queued" | "Running" | "Completed" | "Canceled" | "Failed" | "Paused" | "CompletedWithErrors" + export type Stream = { id: number; name: string | null; codec: Codec | null; aspect_ratio_num: number; aspect_ratio_den: number; frames_per_second_num: number; frames_per_second_den: number; time_base_real_den: number; time_base_real_num: number; dispositions: string[]; metadata: Metadata } export type SubtitleProps = { width: number; height: number } From 9ad3a90f946ad85fce143672154e52309a931f4a Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Tue, 14 May 2024 15:57:38 -0300 Subject: [PATCH 05/33] Getting active job reports from job system --- .../heavy-lifting/src/job_system/mod.rs | 16 +++++++++++ .../heavy-lifting/src/job_system/runner.rs | 28 ++++++++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/core/crates/heavy-lifting/src/job_system/mod.rs b/core/crates/heavy-lifting/src/job_system/mod.rs index c1967876377a..f0f132fd8313 100644 --- a/core/crates/heavy-lifting/src/job_system/mod.rs +++ b/core/crates/heavy-lifting/src/job_system/mod.rs @@ -28,6 +28,7 @@ pub mod utils; pub use error::JobSystemError; use job::{IntoJob, Job, JobName, JobOutput, OuterContext}; +use report::Report; use runner::{run, JobSystemRunner, RunnerMessage}; use store::{load_jobs, StoredJobEntry}; @@ -123,6 +124,21 @@ impl> JobSystem HashMap { + let (ack_tx, ack_rx) = oneshot::channel(); + self.msgs_tx + .send(RunnerMessage::GetActiveReports { ack_tx }) + .await + .expect("runner msgs channel unexpectedly closed on get active reports request"); + + ack_rx + .await + .expect("ack channel closed before receiving get active reports response") + } + /// Checks if *any* of the desired jobs is running for the desired location /// # Panics /// Panics only happen if internal channels are unexpectedly closed diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index b2476f3a7877..e595dcd0d7eb 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -15,7 +15,10 @@ use std::{ use async_channel as chan; use chrono::Utc; use futures::StreamExt; -use futures_concurrency::{future::TryJoin, stream::Merge}; +use futures_concurrency::{ + future::{Join, TryJoin}, + stream::Merge, +}; use tokio::{ fs, sync::oneshot, @@ -56,6 +59,9 @@ pub(super) enum RunnerMessage>, }, + GetActiveReports { + ack_tx: oneshot::Sender>, + }, CheckIfJobAreRunning { job_names: Vec, location_id: location::id::Type, @@ -197,6 +203,17 @@ impl> JobSystemRunner HashMap { + self.handles + .iter() + .map(|(job_id, handle)| async { (*job_id, handle.ctx.report().await.clone()) }) + .collect::>() + .join() + .await + .into_iter() + .collect() + } + async fn process_command(&mut self, id: JobId, command: Command) -> Result<(), JobSystemError> { if let Some(handle) = self.handles.get_mut(&id) { handle.send_command(command).await?; @@ -251,7 +268,6 @@ impl> JobSystemRunner { - trace!("Job completed and will try to dispatch children jobs: "); try_dispatch_next_job( &mut handle, location_id, @@ -285,8 +301,7 @@ impl> JobSystemRunner: {e:#?}" ); }) @@ -512,6 +527,11 @@ pub(super) async fn run>( .expect("ack channel closed before sending new job response"); } + StreamMessage::RunnerMessage(RunnerMessage::GetActiveReports { ack_tx }) => { + ack_tx + .send(runner.get_active_reports().await) + .expect("ack channel closed before sending active reports response"); + } StreamMessage::RunnerMessage(RunnerMessage::ResumeStoredJob { id, location_id, From 1e4e23ea402b2d7778e294bd5a0d286239aeda85 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Tue, 14 May 2024 16:03:22 -0300 Subject: [PATCH 06/33] Using boxed opaque type to avoid a downcast issue with generics --- .../heavy-lifting/src/media_processor/job.rs | 25 +++++++++++-------- .../src/media_processor/shallow.rs | 16 +++++++----- core/src/location/non_indexed.rs | 9 ++++--- 3 files changed, 30 insertions(+), 20 deletions(-) diff --git a/core/crates/heavy-lifting/src/media_processor/job.rs b/core/crates/heavy-lifting/src/media_processor/job.rs index 150cdff33931..a64e163102e9 100644 --- a/core/crates/heavy-lifting/src/media_processor/job.rs +++ b/core/crates/heavy-lifting/src/media_processor/job.rs @@ -38,11 +38,14 @@ use itertools::Itertools; use prisma_client_rust::{raw, PrismaValue}; use serde::{Deserialize, Serialize}; use serde_json::json; -use tracing::{debug, warn}; +use tracing::{debug, error, warn}; use super::{ helpers, - tasks::{self, media_data_extractor, thumbnailer}, + tasks::{ + self, media_data_extractor, + thumbnailer::{self, NewThumbnailReporter}, + }, NewThumbnailsReporter, BATCH_SIZE, }; @@ -105,7 +108,8 @@ impl Job for MediaProcessor { ctx: &impl JobContext, SerializedTasks(serialized_tasks): SerializedTasks, ) -> Result<(), Error> { - let reporter = NewThumbnailsReporter { ctx: ctx.clone() }; + let reporter: Arc = + Arc::new(NewThumbnailsReporter { ctx: ctx.clone() }); self.pending_tasks_on_resume = dispatcher .dispatch_many_boxed( @@ -113,7 +117,7 @@ impl Job for MediaProcessor { .map_err(media_processor::Error::from)? .into_iter() .map(|(task_kind, task_bytes)| { - let reporter = reporter.clone(); + let reporter = Arc::clone(&reporter); async move { match task_kind { TaskKind::MediaDataExtractor => { @@ -588,14 +592,14 @@ impl SerializableJob for MediaProcessor { .serialize() .await .map(|bytes| (TaskKind::MediaDataExtractor, bytes)) - } else if task.is::>>() { - task.downcast::>>() + } else if task.is::() { + task.downcast::() .expect("just checked") .serialize() .await .map(|bytes| (TaskKind::Thumbnailer, bytes)) } else { - unreachable!("Unexpected task type") + unreachable!("Unexpected task type: ") } }) .collect::>() @@ -815,7 +819,8 @@ async fn dispatch_thumbnailer_tasks( let location_id = parent_iso_file_path.location_id(); let library_id = ctx.id(); let db = ctx.db(); - let reporter = NewThumbnailsReporter { ctx: ctx.clone() }; + let reporter: Arc = + Arc::new(NewThumbnailsReporter { ctx: ctx.clone() }); let mut file_paths = get_all_children_files_by_extensions( db, @@ -847,7 +852,7 @@ async fn dispatch_thumbnailer_tasks( library_id, should_regenerate, false, - reporter.clone(), + Arc::clone(&reporter), ) }) .map(IntoTask::into_task) @@ -867,7 +872,7 @@ async fn dispatch_thumbnailer_tasks( library_id, should_regenerate, true, - reporter.clone(), + Arc::clone(&reporter), ) }) .map(IntoTask::into_task) diff --git a/core/crates/heavy-lifting/src/media_processor/shallow.rs b/core/crates/heavy-lifting/src/media_processor/shallow.rs index e9520173996f..7f93ed3ddd61 100644 --- a/core/crates/heavy-lifting/src/media_processor/shallow.rs +++ b/core/crates/heavy-lifting/src/media_processor/shallow.rs @@ -20,15 +20,18 @@ use std::{ sync::Arc, }; -use futures::StreamExt; -use futures_concurrency::future::{FutureGroup, TryJoin}; +use futures::{stream::FuturesUnordered, StreamExt}; +use futures_concurrency::future::TryJoin; use itertools::Itertools; use prisma_client_rust::{raw, PrismaValue}; use tracing::{debug, warn}; use super::{ helpers::{self, exif_media_data, ffmpeg_media_data, thumbnailer::THUMBNAIL_CACHE_DIR_NAME}, - tasks::{self, media_data_extractor, thumbnailer}, + tasks::{ + self, media_data_extractor, + thumbnailer::{self, NewThumbnailReporter}, + }, NewThumbnailsReporter, BATCH_SIZE, }; @@ -82,7 +85,7 @@ pub async fn shallow( .into_iter() .map(CancelTaskOnDrop::new), ) - .collect::>(); + .collect::>(); while let Some(res) = futures.next().await { match res { @@ -226,7 +229,8 @@ async fn dispatch_thumbnailer_tasks( let location_id = parent_iso_file_path.location_id(); let library_id = ctx.id(); let db = ctx.db(); - let reporter = NewThumbnailsReporter { ctx: ctx.clone() }; + let reporter: Arc = + Arc::new(NewThumbnailsReporter { ctx: ctx.clone() }); let file_paths = get_files_by_extensions( db, @@ -249,7 +253,7 @@ async fn dispatch_thumbnailer_tasks( library_id, should_regenerate, true, - reporter.clone(), + Arc::clone(&reporter), ) }) .map(IntoTask::into_task) diff --git a/core/src/location/non_indexed.rs b/core/src/location/non_indexed.rs index 88bbb91f1fe8..5cd432e5c83d 100644 --- a/core/src/location/non_indexed.rs +++ b/core/src/location/non_indexed.rs @@ -11,7 +11,8 @@ use crate::{ use sd_core_file_path_helper::{path_is_hidden, MetadataExt}; use sd_core_heavy_lifting::media_processor::{ - self, get_thumbnails_directory, GenerateThumbnailArgs, NewThumbnailsReporter, ThumbKey, + self, get_thumbnails_directory, thumbnailer::NewThumbnailReporter, GenerateThumbnailArgs, + NewThumbnailsReporter, ThumbKey, }; use sd_core_indexer_rules::{ seed::{NO_HIDDEN, NO_SYSTEM_FILES}, @@ -261,12 +262,12 @@ pub async fn walk( thumbnails_to_generate.extend(document_thumbnails_to_generate); let thumbnails_directory = Arc::new(get_thumbnails_directory(node.config.data_directory())); - let reporter = NewThumbnailsReporter { + let reporter: Arc = Arc::new(NewThumbnailsReporter { ctx: NodeContext { node: Arc::clone(&node), library: Arc::clone(&library), }, - }; + }); node.task_system .dispatch_many( @@ -278,7 +279,7 @@ pub async fn walk( media_processor::Thumbnailer::new_ephemeral( Arc::clone(&thumbnails_directory), chunk.collect(), - reporter.clone(), + Arc::clone(&reporter), ) }) .collect::>(), From 5da9014dbf05c287e606537c0dbc5228c720d36d Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Tue, 14 May 2024 16:04:10 -0300 Subject: [PATCH 07/33] Task system issues discovered on race conditions --- crates/task-system/src/task.rs | 57 ++++++++++---- crates/task-system/src/worker/runner.rs | 100 +++++++++++++++--------- 2 files changed, 103 insertions(+), 54 deletions(-) diff --git a/crates/task-system/src/task.rs b/crates/task-system/src/task.rs index 486818c7dedd..1f9c3bcd93c1 100644 --- a/crates/task-system/src/task.rs +++ b/crates/task-system/src/task.rs @@ -15,7 +15,7 @@ use async_trait::async_trait; use chan::{Recv, RecvError}; use downcast_rs::{impl_downcast, Downcast}; use tokio::{spawn, sync::oneshot}; -use tracing::{trace, warn}; +use tracing::{error, trace, warn}; use uuid::Uuid; use super::{ @@ -353,7 +353,7 @@ impl TaskRemoteController { /// Will panic if the worker failed to ack the pause request pub async fn pause(&self) -> Result<(), SystemError> { let is_paused = self.worktable.is_paused.load(Ordering::Relaxed); - let is_canceled = self.worktable.is_canceled.load(Ordering::Relaxed); + let is_canceled = self.worktable.has_canceled.load(Ordering::Relaxed); let is_done = self.worktable.is_done.load(Ordering::Relaxed); trace!("Received pause command task: "); @@ -389,7 +389,7 @@ impl TaskRemoteController { /// /// Will panic if the worker failed to ack the cancel request pub async fn cancel(&self) { - let is_canceled = self.worktable.is_canceled.load(Ordering::Relaxed); + let is_canceled = self.worktable.has_canceled.load(Ordering::Relaxed); let is_done = self.worktable.is_done.load(Ordering::Relaxed); trace!("Received cancel command task: "); @@ -405,7 +405,7 @@ impl TaskRemoteController { rx.await.expect("Worker failed to ack cancel request"); } else { trace!("Task is not running, setting is_canceled flag"); - self.worktable.is_canceled.store(true, Ordering::Relaxed); + self.worktable.has_canceled.store(true, Ordering::Relaxed); self.system_comm .cancel_not_running_task( self.task_id, @@ -441,7 +441,10 @@ impl TaskRemoteController { /// Verify if the task was already completed #[must_use] pub fn is_done(&self) -> bool { - self.worktable.is_done.load(Ordering::Relaxed) + self.worktable.is_done() + | self.worktable.has_shutdown() + | self.worktable.has_aborted() + | self.worktable.has_canceled() } } @@ -531,7 +534,8 @@ impl Future for CancelTaskOnDrop { Poll::Pending => Poll::Pending, } } else { - Poll::Ready(Ok(TaskStatus::Canceled)) + error!("tried to poll an already completed CancelTaskOnDrop future"); + Poll::Pending } } } @@ -551,8 +555,9 @@ pub struct TaskWorktable { is_running: AtomicBool, is_done: AtomicBool, is_paused: AtomicBool, - is_canceled: AtomicBool, - is_aborted: AtomicBool, + has_canceled: AtomicBool, + has_aborted: AtomicBool, + has_shutdown: AtomicBool, interrupt_tx: chan::Sender, current_worker_id: AtomicWorkerId, } @@ -564,8 +569,9 @@ impl TaskWorktable { is_running: AtomicBool::new(false), is_done: AtomicBool::new(false), is_paused: AtomicBool::new(false), - is_canceled: AtomicBool::new(false), - is_aborted: AtomicBool::new(false), + has_canceled: AtomicBool::new(false), + has_aborted: AtomicBool::new(false), + has_shutdown: AtomicBool::new(false), interrupt_tx, current_worker_id: AtomicWorkerId::new(worker_id), } @@ -581,12 +587,23 @@ impl TaskWorktable { self.is_running.store(false, Ordering::Relaxed); } + pub fn set_canceled(&self) { + self.has_canceled.store(true, Ordering::Relaxed); + self.is_running.store(false, Ordering::Relaxed); + } + pub fn set_unpause(&self) { self.is_paused.store(false, Ordering::Relaxed); } pub fn set_aborted(&self) { - self.is_aborted.store(true, Ordering::Relaxed); + self.has_aborted.store(true, Ordering::Relaxed); + self.is_running.store(false, Ordering::Relaxed); + } + + pub fn set_shutdown(&self) { + self.has_shutdown.store(true, Ordering::Relaxed); + self.is_running.store(false, Ordering::Relaxed); } pub async fn pause(&self, tx: oneshot::Sender<()>) { @@ -605,7 +622,7 @@ impl TaskWorktable { } pub async fn cancel(&self, tx: oneshot::Sender<()>) { - self.is_canceled.store(true, Ordering::Relaxed); + self.has_canceled.store(true, Ordering::Relaxed); self.is_running.store(false, Ordering::Relaxed); self.interrupt_tx @@ -617,16 +634,24 @@ impl TaskWorktable { .expect("Worker channel closed trying to pause task"); } + pub fn is_done(&self) -> bool { + self.is_done.load(Ordering::Relaxed) + } + pub fn is_paused(&self) -> bool { self.is_paused.load(Ordering::Relaxed) } - pub fn is_canceled(&self) -> bool { - self.is_canceled.load(Ordering::Relaxed) + pub fn has_canceled(&self) -> bool { + self.has_canceled.load(Ordering::Relaxed) + } + + pub fn has_aborted(&self) -> bool { + self.has_aborted.load(Ordering::Relaxed) } - pub fn is_aborted(&self) -> bool { - self.is_aborted.load(Ordering::Relaxed) + pub fn has_shutdown(&self) -> bool { + self.has_shutdown.load(Ordering::Relaxed) } } diff --git a/crates/task-system/src/worker/runner.rs b/crates/task-system/src/worker/runner.rs index ac4788266b23..5e680ceba2a1 100644 --- a/crates/task-system/src/worker/runner.rs +++ b/crates/task-system/src/worker/runner.rs @@ -372,13 +372,12 @@ impl Runner { if let Some(suspended_task) = &self.suspended_task { if suspended_task.task.id() == task_id { trace!( - "Task is already suspended but will be paused: ", + "Task is already suspended but will be canceled: ", self.worker_id ); send_cancel_task_response( self.worker_id, - task_id, self.suspended_task.take().expect("we just checked it"), ); @@ -399,7 +398,6 @@ impl Runner { { send_cancel_task_response( self.worker_id, - task_id, self.priority_tasks .remove(index) .expect("we just checked it"), @@ -415,7 +413,6 @@ impl Runner { { send_cancel_task_response( self.worker_id, - task_id, self.tasks.remove(index).expect("we just checked it"), ); } @@ -607,13 +604,12 @@ impl Runner { if let Some(suspended_task) = &self.suspended_task { if suspended_task.task.id() == task_id { trace!( - "Task is already suspended but will be paused: ", + "Task is already suspended but will be force aborted: ", self.worker_id ); send_forced_abortion_task_response( self.worker_id, - task_id, self.suspended_task.take().expect("we just checked it"), ); @@ -628,7 +624,6 @@ impl Runner { { send_forced_abortion_task_response( self.worker_id, - task_id, self.priority_tasks .remove(index) .expect("we just checked it"), @@ -644,7 +639,6 @@ impl Runner { { send_forced_abortion_task_response( self.worker_id, - task_id, self.tasks.remove(index).expect("we just checked it"), ); @@ -672,6 +666,7 @@ impl Runner { let Self { worker_id, tasks, + suspended_task, paused_tasks, priority_tasks, is_idle, @@ -718,12 +713,13 @@ impl Runner { priority_tasks .into_iter() + .chain(suspended_task.into_iter()) .chain(paused_tasks.into_values()) .chain(tasks.into_iter()) .for_each(|task_work_state| { send_shutdown_task_response( worker_id, - task_work_state.task.id(), + &task_work_state.task.id(), task_work_state, ); }); @@ -750,19 +746,19 @@ impl Runner { status, }) => match status { InternalTaskExecStatus::Done(out) => { - send_complete_task_response(worker_id, task_id, task_work_state, out); + send_complete_task_response(worker_id, &task_id, task_work_state, out); } InternalTaskExecStatus::Canceled => { - send_cancel_task_response(worker_id, task_id, task_work_state); + send_cancel_task_response(worker_id, task_work_state); } InternalTaskExecStatus::Suspend | InternalTaskExecStatus::Paused => { - send_shutdown_task_response(worker_id, task_id, task_work_state); + send_shutdown_task_response(worker_id, &task_id, task_work_state); } InternalTaskExecStatus::Error(e) => { - send_error_task_response(worker_id, task_id, task_work_state, e); + send_error_task_response(worker_id, task_work_state, e); } }, Err(()) => { @@ -775,7 +771,7 @@ impl Runner { RunnerMessage::StoleTask(Some(task_work_state)) => { send_shutdown_task_response( worker_id, - task_work_state.task.id(), + &task_work_state.task.id(), task_work_state, ); } @@ -849,7 +845,13 @@ impl Runner { match kind { PendingTaskKind::Normal => self.tasks.push_front(task_work_state), PendingTaskKind::Priority => self.priority_tasks.push_front(task_work_state), - PendingTaskKind::Suspended => self.suspended_task = Some(task_work_state), + PendingTaskKind::Suspended => { + assert!( + self.suspended_task.is_none(), + "tried to suspend a task when we already have a suspended task on PendingTaskKind::Suspended" + ); + self.suspended_task = Some(task_work_state); + } } self.task_kinds.insert(id, kind); @@ -966,7 +968,7 @@ impl Runner { match status { InternalTaskExecStatus::Done(out) => { self.task_kinds.remove(&task_id); - send_complete_task_response(self.worker_id, task_id, task_work_state, out); + send_complete_task_response(self.worker_id, &task_id, task_work_state, out); } InternalTaskExecStatus::Paused => { @@ -979,15 +981,19 @@ impl Runner { InternalTaskExecStatus::Canceled => { self.task_kinds.remove(&task_id); - send_cancel_task_response(self.worker_id, task_id, task_work_state); + send_cancel_task_response(self.worker_id, task_work_state); } InternalTaskExecStatus::Error(e) => { self.task_kinds.remove(&task_id); - send_error_task_response(self.worker_id, task_id, task_work_state, e); + send_error_task_response(self.worker_id, task_work_state, e); } InternalTaskExecStatus::Suspend => { + assert!( + self.suspended_task.is_none(), + "tried to suspend a task when we already have a suspended task on InternalTaskExecStatus::Suspend" + ); self.suspended_task = Some(task_work_state); trace!( "Task suspended: ", @@ -1142,8 +1148,8 @@ fn handle_run_task_attempt( ) -> JoinHandle> { spawn({ let already_paused = worktable.is_paused(); - let already_canceled = worktable.is_canceled(); - let already_aborted = worktable.is_aborted(); + let already_canceled = worktable.has_canceled(); + let already_aborted = worktable.has_aborted(); async move { if already_paused { @@ -1414,14 +1420,14 @@ async fn run_single_task( fn send_complete_task_response( worker_id: WorkerId, - task_id: TaskId, + task_id: &TaskId, TaskWorkState { done_tx, worktable, .. }: TaskWorkState, out: TaskOutput, ) { worktable.set_completed(); - if done_tx.send(Ok(TaskStatus::Done((task_id, out)))).is_err() { + if done_tx.send(Ok(TaskStatus::Done((*task_id, out)))).is_err() { warn!( "Task done channel closed before sending done response for task: \ " @@ -1436,30 +1442,40 @@ fn send_complete_task_response( fn send_cancel_task_response( worker_id: WorkerId, - task_id: TaskId, TaskWorkState { - done_tx, worktable, .. + task, + done_tx, + worktable, + .. }: TaskWorkState, ) { - worktable.set_completed(); + worktable.set_canceled(); if done_tx.send(Ok(TaskStatus::Canceled)).is_err() { warn!( "Task done channel closed before sending canceled response for task: \ - ", + ", + task.id(), ); } else { trace!( "Emitted task canceled signal on cancel not running task: \ - ", + ", + task.id(), ); } } fn send_shutdown_task_response( worker_id: WorkerId, - task_id: TaskId, - TaskWorkState { task, done_tx, .. }: TaskWorkState, + task_id: &TaskId, + TaskWorkState { + task, + done_tx, + worktable, + .. + }: TaskWorkState, ) { + worktable.set_shutdown(); if done_tx.send(Ok(TaskStatus::Shutdown(task))).is_err() { warn!( "Task done channel closed before sending shutdown response for task: \ @@ -1475,9 +1491,11 @@ fn send_shutdown_task_response( fn send_error_task_response( worker_id: usize, - task_id: uuid::Uuid, TaskWorkState { - done_tx, worktable, .. + task, + done_tx, + worktable, + .. }: TaskWorkState, e: E, ) { @@ -1485,33 +1503,39 @@ fn send_error_task_response( if done_tx.send(Ok(TaskStatus::Error(e))).is_err() { warn!( "Task done channel closed before sending error response for task: \ - " + ", + task.id() ); } else { trace!( "Emitted task error signal on shutdown: \ - " + ", + task.id() ); } } fn send_forced_abortion_task_response( worker_id: WorkerId, - task_id: TaskId, TaskWorkState { - done_tx, worktable, .. + task, + done_tx, + worktable, + .. }: TaskWorkState, ) { - worktable.set_completed(); + worktable.set_aborted(); if done_tx.send(Ok(TaskStatus::ForcedAbortion)).is_err() { warn!( "Task done channel closed before sending forced abortion response for task: \ - ", + ", + task.id() ); } else { trace!( "Emitted task forced abortion signal on cancel not running task: \ - ", + ", + task.id() ); } } From 69688846a9f1ce43fb5fc84d0a28aff759b9bac6 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Wed, 15 May 2024 00:18:01 -0300 Subject: [PATCH 08/33] Enable debug --- .vscode/launch.json | 1 + Cargo.toml | 58 +++++++++++++++++++++++++++++---------------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 411d7f5a134d..df88b7c03af8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -11,6 +11,7 @@ "cargo": { "args": [ "build", + "--profile=dev-debug", "--manifest-path=./apps/desktop/src-tauri/Cargo.toml", "--no-default-features" ], diff --git a/Cargo.toml b/Cargo.toml index 38ebb83dbc74..78bebc482156 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,17 +1,17 @@ [workspace] resolver = "2" members = [ - "core", - "core/crates/*", - "crates/*", - "apps/cli", - "apps/p2p-relay", - "apps/desktop/src-tauri", - "apps/desktop/crates/*", - "apps/mobile/modules/sd-core/core", - "apps/mobile/modules/sd-core/android/crate", - "apps/mobile/modules/sd-core/ios/crate", - "apps/server", + "core", + "core/crates/*", + "crates/*", + "apps/cli", + "apps/p2p-relay", + "apps/desktop/src-tauri", + "apps/desktop/crates/*", + "apps/mobile/modules/sd-core/core", + "apps/mobile/modules/sd-core/android/crate", + "apps/mobile/modules/sd-core/ios/crate", + "apps/server", ] [workspace.package] @@ -21,19 +21,19 @@ repository = "https://github.com/spacedriveapp/spacedrive" [workspace.dependencies] prisma-client-rust = { git = "https://github.com/brendonovich/prisma-client-rust", rev = "4f9ef9d38ca732162accff72b2eb684d2f120bab", features = [ - "migrations", - "specta", - "sqlite", - "sqlite-create-many", + "migrations", + "specta", + "sqlite", + "sqlite-create-many", ], default-features = false } prisma-client-rust-cli = { git = "https://github.com/brendonovich/prisma-client-rust", rev = "4f9ef9d38ca732162accff72b2eb684d2f120bab", features = [ - "migrations", - "specta", - "sqlite", - "sqlite-create-many", + "migrations", + "specta", + "sqlite", + "sqlite-create-many", ], default-features = false } prisma-client-rust-sdk = { git = "https://github.com/brendonovich/prisma-client-rust", rev = "4f9ef9d38ca732162accff72b2eb684d2f120bab", features = [ - "sqlite", + "sqlite", ], default-features = false } rspc = { version = "0.1.4" } @@ -115,6 +115,17 @@ lto = false codegen-units = 256 incremental = true +[profile.dev-debug] +inherits = "dev" +# Enables debugger +split-debuginfo = "none" +opt-level = 0 +debug = "full" +strip = "none" +lto = "off" +codegen-units = 256 +incremental = true + # Set the settings for build scripts and proc-macros. [profile.dev.build-override] opt-level = 3 @@ -124,6 +135,13 @@ opt-level = 3 opt-level = 3 incremental = false +# Set the default for dependencies, except workspace members. +[profile.dev-debug.package."*"] +inherits = "dev" +opt-level = 3 +debug = "full" +incremental = false + # Optimize release builds [profile.release] panic = "abort" # Strip expensive panic clean-up logic From bb6f6d66c2ab6772c2a290583f6892571dee3656 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Wed, 15 May 2024 00:24:21 -0300 Subject: [PATCH 09/33] Fix job report in the job manager --- apps/mobile/src/components/job/Job.tsx | 4 +- apps/mobile/src/components/job/JobGroup.tsx | 4 +- .../heavy-lifting/src/file_identifier/job.rs | 80 ++++---- core/crates/heavy-lifting/src/indexer/job.rs | 53 +++-- .../heavy-lifting/src/job_system/job.rs | 17 +- .../heavy-lifting/src/job_system/report.rs | 18 +- .../heavy-lifting/src/job_system/runner.rs | 102 ++++++---- .../helpers/ffmpeg_media_data.rs | 46 +---- .../heavy-lifting/src/media_processor/job.rs | 38 ++-- core/src/api/jobs.rs | 41 ++-- crates/media-metadata/src/ffmpeg/mod.rs | 41 +--- crates/utils/src/lib.rs | 56 ++++++ .../Layout/Sidebar/JobManager/Job.tsx | 9 +- .../Layout/Sidebar/JobManager/JobGroup.tsx | 4 +- interface/hooks/useIsLocationIndexing.ts | 11 +- interface/hooks/useRedirectToNewLocation.ts | 18 +- packages/client/src/core.ts | 6 +- packages/client/src/utils/index.ts | 7 +- packages/client/src/utils/jobs/index.ts | 19 +- .../src/utils/jobs/useGroupJobTimeText.tsx | 8 +- packages/client/src/utils/jobs/useJobInfo.tsx | 185 ++++++++++++------ 21 files changed, 464 insertions(+), 303 deletions(-) diff --git a/apps/mobile/src/components/job/Job.tsx b/apps/mobile/src/components/job/Job.tsx index e8f359266b8e..0f080faaf00c 100644 --- a/apps/mobile/src/components/job/Job.tsx +++ b/apps/mobile/src/components/job/Job.tsx @@ -10,14 +10,14 @@ import { } from 'phosphor-react-native'; import { memo } from 'react'; import { View, ViewStyle } from 'react-native'; -import { JobProgressEvent, JobReport, useJobInfo } from '@sd/client'; +import { JobProgressEvent, Report, useJobInfo } from '@sd/client'; import { tw } from '~/lib/tailwind'; import { ProgressBar } from '../animation/ProgressBar'; import JobContainer from './JobContainer'; type JobProps = { - job: JobReport; + job: Report; isChild?: boolean; containerStyle?: ViewStyle; progress: JobProgressEvent | null; diff --git a/apps/mobile/src/components/job/JobGroup.tsx b/apps/mobile/src/components/job/JobGroup.tsx index 811bcae85267..dbc4f882ffe8 100644 --- a/apps/mobile/src/components/job/JobGroup.tsx +++ b/apps/mobile/src/components/job/JobGroup.tsx @@ -9,7 +9,7 @@ import { getTotalTasks, JobGroup, JobProgressEvent, - JobReport, + Report, useLibraryMutation, useTotalElapsedTimeText } from '@sd/client'; @@ -158,7 +158,7 @@ const toastErrorSuccess = ( }; }; -function Options({ activeJob, group }: { activeJob?: JobReport; group: JobGroup }) { +function Options({ activeJob, group }: { activeJob?: Report; group: JobGroup }) { // const queryClient = useQueryClient(); const resumeJob = useLibraryMutation( diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index fd73854eca54..0cc967ca3cfc 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -19,7 +19,7 @@ use sd_task_system::{ AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskId, TaskOutput, TaskStatus, }; -use sd_utils::db::maybe_missing; +use sd_utils::{db::maybe_missing, u64_to_frontend}; use std::{ collections::{HashMap, HashSet}, @@ -573,44 +573,46 @@ pub struct Metadata { completed_tasks: u64, } -impl From for ReportOutputMetadata { - fn from(value: Metadata) -> Self { - Self::Metrics(HashMap::from([ - ( - "extract_metadata_time".into(), - json!(value.extract_metadata_time), - ), - ( - "assign_cas_ids_time".into(), - json!(value.assign_cas_ids_time), - ), - ( - "fetch_existing_objects_time".into(), - json!(value.fetch_existing_objects_time), - ), - ( - "assign_to_existing_object_time".into(), - json!(value.assign_to_existing_object_time), - ), - ("create_object_time".into(), json!(value.create_object_time)), - ( - "seeking_orphans_time".into(), - json!(value.seeking_orphans_time), - ), - ( - "total_found_orphans".into(), - json!(value.total_found_orphans), - ), - ( - "created_objects_count".into(), - json!(value.created_objects_count), - ), - ( - "linked_objects_count".into(), - json!(value.linked_objects_count), - ), - ("total_tasks".into(), json!(value.completed_tasks)), - ])) +impl From for Vec { + fn from( + Metadata { + extract_metadata_time, + assign_cas_ids_time, + fetch_existing_objects_time, + assign_to_existing_object_time, + create_object_time, + seeking_orphans_time, + total_found_orphans, + created_objects_count, + linked_objects_count, + completed_tasks, + }: Metadata, + ) -> Self { + vec![ + ReportOutputMetadata::FileIdentifier { + total_orphan_paths: u64_to_frontend(total_found_orphans), + total_objects_created: u64_to_frontend(created_objects_count), + total_objects_linked: u64_to_frontend(linked_objects_count), + }, + ReportOutputMetadata::Metrics(HashMap::from([ + ("extract_metadata_time".into(), json!(extract_metadata_time)), + ("assign_cas_ids_time".into(), json!(assign_cas_ids_time)), + ( + "fetch_existing_objects_time".into(), + json!(fetch_existing_objects_time), + ), + ( + "assign_to_existing_object_time".into(), + json!(assign_to_existing_object_time), + ), + ("create_object_time".into(), json!(create_object_time)), + ("seeking_orphans_time".into(), json!(seeking_orphans_time)), + ("total_found_orphans".into(), json!(total_found_orphans)), + ("created_objects_count".into(), json!(created_objects_count)), + ("linked_objects_count".into(), json!(linked_objects_count)), + ("total_tasks".into(), json!(completed_tasks)), + ])), + ] } } diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs index dd6a0d864666..ce27ed137332 100644 --- a/core/crates/heavy-lifting/src/indexer/job.rs +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -21,7 +21,7 @@ use sd_task_system::{ AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskId, TaskOutput, TaskStatus, }; -use sd_utils::db::maybe_missing; +use sd_utils::{db::maybe_missing, u64_to_frontend}; use std::{ collections::{HashMap, HashSet}, @@ -602,23 +602,40 @@ pub struct Metadata { removed_count: u64, } -impl From for ReportOutputMetadata { - fn from(value: Metadata) -> Self { - Self::Metrics(HashMap::from([ - ("db_write_time".into(), json!(value.db_write_time)), - ("scan_read_time".into(), json!(value.scan_read_time)), - ("total_tasks".into(), json!(value.total_tasks)), - ("total_paths".into(), json!(value.total_paths)), - ( - "total_updated_paths".into(), - json!(value.total_updated_paths), - ), - ("total_save_steps".into(), json!(value.total_save_steps)), - ("total_update_steps".into(), json!(value.total_update_steps)), - ("indexed_count".into(), json!(value.indexed_count)), - ("updated_count".into(), json!(value.updated_count)), - ("removed_count".into(), json!(value.removed_count)), - ])) +impl From for Vec { + fn from( + Metadata { + db_write_time, + scan_read_time, + total_tasks, + completed_tasks, + total_paths, + total_updated_paths, + total_save_steps, + total_update_steps, + indexed_count, + updated_count, + removed_count, + }: Metadata, + ) -> Self { + vec![ + ReportOutputMetadata::Indexer { + total_paths: u64_to_frontend(total_paths), + }, + ReportOutputMetadata::Metrics(HashMap::from([ + ("db_write_time".into(), json!(db_write_time)), + ("scan_read_time".into(), json!(scan_read_time)), + ("total_tasks".into(), json!(total_tasks)), + ("completed_tasks".into(), json!(completed_tasks)), + ("total_paths".into(), json!(total_paths)), + ("total_updated_paths".into(), json!(total_updated_paths)), + ("total_save_steps".into(), json!(total_save_steps)), + ("total_update_steps".into(), json!(total_update_steps)), + ("indexed_count".into(), json!(indexed_count)), + ("updated_count".into(), json!(updated_count)), + ("removed_count".into(), json!(removed_count)), + ])), + ] } } diff --git a/core/crates/heavy-lifting/src/job_system/job.rs b/core/crates/heavy-lifting/src/job_system/job.rs index e8d73996718d..0b019d786070 100644 --- a/core/crates/heavy-lifting/src/job_system/job.rs +++ b/core/crates/heavy-lifting/src/job_system/job.rs @@ -162,7 +162,7 @@ where #[derive(Debug)] pub struct JobReturn { data: JobOutputData, - metadata: Option, + metadata: Vec, non_critical_errors: Vec, } @@ -179,7 +179,7 @@ impl Default for JobReturn { fn default() -> Self { Self { data: JobOutputData::Empty, - metadata: None, + metadata: vec![], non_critical_errors: vec![], } } @@ -198,8 +198,8 @@ impl JobReturnBuilder { } #[must_use] - pub fn with_metadata(mut self, metadata: impl Into) -> Self { - self.job_return.metadata = Some(metadata.into()); + pub fn with_metadata(mut self, metadata: impl Into>) -> Self { + self.job_return.metadata = metadata.into(); self } @@ -251,9 +251,9 @@ impl JobOutput { ); } - if let Some(metadata) = metadata { - report.metadata.push(ReportMetadata::Output(metadata)); - } + report + .metadata + .extend(metadata.into_iter().map(ReportMetadata::Output)); report.completed_at = Some(Utc::now()); @@ -505,6 +505,7 @@ impl> JobHandle Result<(), JobSystemError> { trace!("Handle pausing job on shutdown: ", self.id); @@ -769,7 +770,7 @@ async fn to_spawn_job>( if let Err(e) = res { assert!(matches!(e, TaskSystemError::TaskNotFound(_))); - warn!("Tried to pause a task that was already completed"); + warn!("Tried to resume a task that was already completed"); } }); } diff --git a/core/crates/heavy-lifting/src/job_system/report.rs b/core/crates/heavy-lifting/src/job_system/report.rs index 7bc17e56c821..c4542564bb0b 100644 --- a/core/crates/heavy-lifting/src/job_system/report.rs +++ b/core/crates/heavy-lifting/src/job_system/report.rs @@ -53,6 +53,7 @@ impl From for rspc::Error { #[derive(Debug, Serialize, Deserialize, Type, Clone)] #[serde(rename_all = "snake_case")] +#[serde(tag = "type", content = "metadata")] pub enum ReportMetadata { Input(ReportInputMetadata), Output(ReportOutputMetadata), @@ -60,6 +61,7 @@ pub enum ReportMetadata { #[derive(Debug, Serialize, Deserialize, Type, Clone)] #[serde(rename_all = "snake_case")] +#[serde(tag = "type", content = "data")] pub enum ReportInputMetadata { // TODO: Add more variants as needed Location(location::Data), @@ -68,9 +70,23 @@ pub enum ReportInputMetadata { #[derive(Debug, Serialize, Deserialize, Type, Clone)] #[serde(rename_all = "snake_case")] +#[serde(tag = "type", content = "data")] pub enum ReportOutputMetadata { Metrics(HashMap), - // TODO: Add more variants as needed + Indexer { + total_paths: (u32, u32), + }, + FileIdentifier { + total_orphan_paths: (u32, u32), + total_objects_created: (u32, u32), + total_objects_linked: (u32, u32), + }, + MediaProcessor { + media_data_extracted: (u32, u32), + media_data_skipped: (u32, u32), + thumbnails_generated: (u32, u32), + thumbnails_skipped: (u32, u32), + }, } #[derive(Debug, Serialize, Type, Clone)] diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index e595dcd0d7eb..5e309f119683 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -165,6 +165,8 @@ impl> JobSystemRunner> JobSystemRunner) { + async fn process_return_status( + &mut self, + job_id: JobId, + status: Result, + ) -> Result<(), JobSystemError> { let Self { handles, worktables, @@ -275,7 +281,8 @@ impl> JobSystemRunner> JobSystemRunner { let name = handle.ctx.report().await.name; - let Ok(next_jobs) = handle - .next_jobs - .into_iter() - .map(|next_job| async move { - let next_id = next_job.id(); - let next_name = next_job.job_name(); - next_job - .serialize() - .await - .map(|maybe_serialized_job| { - maybe_serialized_job.map(|serialized_job| StoredJob { - id: next_id, - name: next_name, - serialized_job, - }) - }) - .map_err(|e| { - error!( - "Failed to serialize next job: : {e:#?}" - ); - }) - }) - .collect::>() - .try_join() - .await + let Some(next_jobs) = + serialize_next_jobs_to_shutdown(job_id, job_name, handle.next_jobs).await else { - return; + return Ok(()); }; worktables @@ -324,10 +307,10 @@ impl> JobSystemRunner { @@ -335,12 +318,12 @@ impl> JobSystemRunner" ); - return; + return Ok(()); } Ok(ReturnStatus::Shutdown(Err(e))) => { error!("Failed to serialize job: {e:#?}"); - return; + return Ok(()); } Ok(ReturnStatus::Canceled) => handle @@ -355,6 +338,8 @@ impl> JobSystemRunner> JobSystemRunner>( +async fn serialize_next_jobs_to_shutdown>( + parent_job_id: JobId, + parent_job_name: JobName, + next_jobs: impl IntoIterator>> + Send, +) -> Option> { + next_jobs + .into_iter() + .map(|next_job| async move { + let next_id = next_job.id(); + let next_name = next_job.job_name(); + next_job + .serialize() + .await + .map(|maybe_serialized_job| { + maybe_serialized_job.map(|serialized_job| StoredJob { + id: next_id, + name: next_name, + serialized_job, + }) + }) + .map_err(|e| { + error!( + "Failed to serialize next job: : {e:#?}" + ); + }) + }) + .collect::>() + .try_join() + .await + .map(|maybe_serialized_next_jobs| { + maybe_serialized_next_jobs.into_iter().flatten().collect() + }) + .ok() +} + +async fn try_dispatch_next_job>( handle: &mut JobHandle, location_id: location::id::Type, base_dispatcher: BaseTaskDispatcher, @@ -445,7 +466,7 @@ fn try_dispatch_next_job>( }: &mut JobsWorktables, handles: &mut HashMap>, job_return_status_tx: chan::Sender<(JobId, Result)>, -) { +) -> Result<(), JobSystemError> { if let Some(next) = handle.next_jobs.pop_front() { let next_id = next.id(); let next_hash = next.hash(); @@ -465,6 +486,7 @@ fn try_dispatch_next_job>( handle.ctx.get_outer_ctx(), job_return_status_tx, ); + next_handle.register_start(Utc::now()).await?; assert!( next_handle.next_jobs.is_empty(), @@ -481,6 +503,8 @@ fn try_dispatch_next_job>( } else { trace!("No next jobs to dispatch"); } + + Ok(()) } pub(super) async fn run>( @@ -511,7 +535,9 @@ pub(super) async fn run>( // Job return status messages StreamMessage::ReturnStatus((job_id, status)) => { trace!("Received return status for job: "); - runner.process_return_status(job_id, status).await; + if let Err(e) = runner.process_return_status(job_id, status).await { + error!("Failed to process return status: {e:#?}"); + } } // Runner messages @@ -565,7 +591,9 @@ pub(super) async fn run>( // Consuming all pending return status messages loop { while let Ok((job_id, status)) = job_return_status_rx_to_shutdown.try_recv() { - runner.process_return_status(job_id, status).await; + if let Err(e) = runner.process_return_status(job_id, status).await { + error!("Failed to process return status before shutting down: {e:#?}"); + } } if runner.is_empty() { diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs index da23092bc343..02ec1e38aaa3 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs @@ -20,7 +20,10 @@ use sd_prisma::prisma::{ ffmpeg_data, ffmpeg_media_audio_props, ffmpeg_media_chapter, ffmpeg_media_codec, ffmpeg_media_program, ffmpeg_media_stream, ffmpeg_media_video_props, object, PrismaClient, }; -use sd_utils::db::{ffmpeg_data_field_from_db, ffmpeg_data_field_to_db}; +use sd_utils::{ + db::{ffmpeg_data_field_from_db, ffmpeg_data_field_to_db}, + i64_to_frontend, +}; use std::{collections::HashMap, path::Path}; @@ -590,27 +593,10 @@ pub fn from_prisma_data( ) -> FFmpegMetadata { FFmpegMetadata { formats: formats.split(',').map(String::from).collect::>(), - duration: duration.map(|duration| { - let duration = ffmpeg_data_field_from_db(&duration); - - #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - let duration = ((duration >> 32) as i32, duration as u32); - duration - }), - start_time: start_time.map(|start_time| { - let start_time = ffmpeg_data_field_from_db(&start_time); - - #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - let start_time = ((start_time >> 32) as i32, start_time as u32); - start_time - }), - bit_rate: { - let bit_rate = ffmpeg_data_field_from_db(&bit_rate); - - #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - let bit_rate = ((bit_rate >> 32) as i32, bit_rate as u32); - bit_rate - }, + duration: duration.map(|duration| i64_to_frontend(ffmpeg_data_field_from_db(&duration))), + start_time: start_time + .map(|start_time| i64_to_frontend(ffmpeg_data_field_from_db(&start_time))), + bit_rate: i64_to_frontend(ffmpeg_data_field_from_db(&bit_rate)), chapters: chapters_from_prisma_data(chapters), programs: programs_from_prisma_data(programs), metadata: from_slice_option_to_option(metadata).unwrap_or_default(), @@ -632,20 +618,8 @@ fn chapters_from_prisma_data(chapters: Vec) -> Vec> 32) as i32, start as u32); - start - }, - end: { - let end = ffmpeg_data_field_from_db(&end); - - #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - let end = ((end >> 32) as i32, end as u32); - end - }, + start: i64_to_frontend(ffmpeg_data_field_from_db(&start)), + end: i64_to_frontend(ffmpeg_data_field_from_db(&end)), time_base_den, time_base_num, metadata: from_slice_option_to_option(metadata).unwrap_or_default(), diff --git a/core/crates/heavy-lifting/src/media_processor/job.rs b/core/crates/heavy-lifting/src/media_processor/job.rs index a64e163102e9..977c6fca3cd8 100644 --- a/core/crates/heavy-lifting/src/media_processor/job.rs +++ b/core/crates/heavy-lifting/src/media_processor/job.rs @@ -20,7 +20,7 @@ use sd_task_system::{ AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskOutput, TaskStatus, }; -use sd_utils::db::maybe_missing; +use sd_utils::{db::maybe_missing, u64_to_frontend}; use std::{ collections::HashMap, @@ -447,7 +447,7 @@ struct Metadata { thumbnailer_metrics_acc: ThumbnailerMetricsAccumulator, } -impl From for ReportOutputMetadata { +impl From for Vec { fn from( Metadata { media_data_metrics, @@ -456,19 +456,27 @@ impl From for ReportOutputMetadata { ) -> Self { let thumbnailer_metrics = ThumbnailerMetrics::from(thumbnailer_metrics_accumulator); - Self::Metrics(HashMap::from([ - // - // Media data extractor - // - ( - "media_data_extraction_metrics".into(), - json!(media_data_metrics), - ), - // - // Thumbnailer - // - ("thumbnailer_metrics".into(), json!(thumbnailer_metrics)), - ])) + vec![ + ReportOutputMetadata::MediaProcessor { + media_data_extracted: u64_to_frontend(media_data_metrics.extracted), + media_data_skipped: u64_to_frontend(media_data_metrics.skipped), + thumbnails_generated: u64_to_frontend(thumbnailer_metrics.generated), + thumbnails_skipped: u64_to_frontend(thumbnailer_metrics.skipped), + }, + ReportOutputMetadata::Metrics(HashMap::from([ + // + // Media data extractor + // + ( + "media_data_extraction_metrics".into(), + json!(media_data_metrics), + ), + // + // Thumbnailer + // + ("thumbnailer_metrics".into(), json!(thumbnailer_metrics)), + ])), + ] } } diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index fd0a9d32e1fe..516c67f778e6 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -3,13 +3,13 @@ use crate::{ invalidate_query, location::{find_location, LocationError}, object::validation::old_validator_job::OldObjectValidatorJobInit, - old_job::{JobReport, JobStatus, OldJob, OldJobs}, + old_job::{JobStatus, OldJob, OldJobs}, }; use sd_core_heavy_lifting::{ - file_identifier::FileIdentifier, media_processor::job::MediaProcessor, + file_identifier::FileIdentifier, job_system::report, media_processor::job::MediaProcessor, + JobId, Report, }; -use sd_core_prisma_helpers::job_without_data; use sd_prisma::prisma::{job, location, SortOrder}; @@ -31,6 +31,8 @@ use uuid::Uuid; use super::{utils::library, CoreEvent, Ctx, R}; +const TEN_MINUTES: Duration = Duration::from_secs(60 * 10); + pub(crate) fn mount() -> AlphaRouter { R.router() .procedure("progress", { @@ -42,7 +44,7 @@ pub(crate) fn mount() -> AlphaRouter { .subscription(|(node, _), _: ()| async move { let mut event_bus_rx = node.event_bus.0.subscribe(); // debounce per-job - let mut intervals = BTreeMap::::new(); + let mut intervals = BTreeMap::::new(); async_stream::stream! { loop { @@ -63,6 +65,9 @@ pub(crate) fn mount() -> AlphaRouter { yield progress_event; *instant = Instant::now(); + + // remove stale jobs that didn't receive a progress for more than 10 minutes + intervals.retain(|_, instant| instant.elapsed() < TEN_MINUTES); } } }) @@ -74,44 +79,38 @@ pub(crate) fn mount() -> AlphaRouter { // this is to ensure the client will always get the correct initial state // - jobs are sorted in to groups by their action // - TODO: refactor grouping system to a many-to-many table - #[derive(Debug, Clone, Serialize, Deserialize, Type)] + #[derive(Debug, Clone, Serialize, Type)] pub struct JobGroup { id: Uuid, action: Option, - status: JobStatus, + status: report::Status, created_at: DateTime, - jobs: VecDeque, + jobs: VecDeque, } R.with2(library()) .query(|(node, library), _: ()| async move { let mut groups: HashMap = HashMap::new(); - let job_reports: Vec = library + let job_reports: Vec = library .db .job() .find_many(vec![]) .order_by(job::date_created::order(SortOrder::Desc)) .take(100) - .select(job_without_data::select()) .exec() .await? .into_iter() - .flat_map(JobReport::try_from) + .flat_map(Report::try_from) .collect(); - let active_reports_by_id = node.old_jobs.get_active_reports_with_id().await; + let active_reports_by_id = node.job_system.get_active_reports().await; for job in job_reports { // action name and group key are computed from the job data - let (action_name, group_key) = job.get_meta(); + let (action_name, group_key) = job.get_action_name_and_group_key(); - trace!( - "job {:#?}, action_name {}, group_key {:?}", - job, - action_name, - group_key - ); + trace!("job {job:#?}, action_name {action_name}, group_key {group_key:?}",); // if the job is running, use the in-memory report let report = active_reports_by_id.get(&job.id).unwrap_or(&job); @@ -123,7 +122,7 @@ pub(crate) fn mount() -> AlphaRouter { Entry::Vacant(entry) => { entry.insert(JobGroup { id: job.parent_id.unwrap_or(job.id), - action: Some(action_name.clone()), + action: Some(action_name), status: job.status, jobs: [report.clone()].into_iter().collect(), created_at: job.created_at.unwrap_or(Utc::now()), @@ -134,7 +133,7 @@ pub(crate) fn mount() -> AlphaRouter { let group = entry.get_mut(); // protect paused status from being overwritten - if report.status != JobStatus::Paused { + if report.status != report::Status::Paused { group.status = report.status; } @@ -159,6 +158,8 @@ pub(crate) fn mount() -> AlphaRouter { let mut groups_vec = groups.into_values().collect::>(); groups_vec.sort_by(|a, b| b.created_at.cmp(&a.created_at)); + tracing::debug!("{groups_vec:#?}"); + Ok(groups_vec) }) }) diff --git a/crates/media-metadata/src/ffmpeg/mod.rs b/crates/media-metadata/src/ffmpeg/mod.rs index af7740413dc2..702762c862de 100644 --- a/crates/media-metadata/src/ffmpeg/mod.rs +++ b/crates/media-metadata/src/ffmpeg/mod.rs @@ -54,6 +54,7 @@ mod extract_data { FFmpegAudioProps, FFmpegChapter, FFmpegCodec, FFmpegMediaData, FFmpegMetadata, FFmpegProgram, FFmpegProps, FFmpegStream, FFmpegSubtitleProps, FFmpegVideoProps, }; + use sd_utils::i64_to_frontend; impl From for super::FFmpegMetadata { fn from( @@ -69,27 +70,9 @@ mod extract_data { ) -> Self { Self { formats, - duration: duration.map(|duration| { - #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] - { - // SAFETY: We're splitting in (high, low) parts, so we're not going to lose data on truncation - ((duration >> 32) as i32, duration as u32) - } - }), - start_time: start_time.map(|start_time| { - #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] - { - // SAFETY: We're splitting in (high, low) parts, so we're not going to lose data on truncation - ((start_time >> 32) as i32, start_time as u32) - } - }), - bit_rate: { - #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] - { - // SAFETY: We're splitting in (high, low) parts, so we're not going to lose data on truncation - ((bit_rate >> 32) as i32, bit_rate as u32) - } - }, + duration: duration.map(i64_to_frontend), + start_time: start_time.map(i64_to_frontend), + bit_rate: i64_to_frontend(bit_rate), chapters: chapters.into_iter().map(Into::into).collect(), programs: programs.into_iter().map(Into::into).collect(), metadata: metadata.into(), @@ -117,20 +100,8 @@ mod extract_data { } }, // TODO: FIX these 2 when rspc/specta supports bigint - start: { - #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] - { - // SAFETY: We're splitting in (high, low) parts, so we're not going to lose data on truncation - ((start >> 32) as i32, start as u32) - } - }, - end: { - #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] - { - // SAFETY: We're splitting in (high, low) parts, so we're not going to lose data on truncation - ((end >> 32) as i32, end as u32) - } - }, + start: i64_to_frontend(start), + end: i64_to_frontend(end), time_base_num, time_base_den, metadata: metadata.into(), diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index d8e2236b7db7..1a910d15da18 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -1,3 +1,32 @@ +#![warn( + clippy::all, + clippy::pedantic, + clippy::correctness, + clippy::perf, + clippy::style, + clippy::suspicious, + clippy::complexity, + clippy::nursery, + clippy::unwrap_used, + unused_qualifications, + rust_2018_idioms, + trivial_casts, + trivial_numeric_casts, + unused_allocation, + clippy::unnecessary_cast, + clippy::cast_lossless, + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + clippy::cast_sign_loss, + clippy::dbg_macro, + clippy::deprecated_cfg_attr, + clippy::separated_literal_suffix, + deprecated +)] +#![forbid(deprecated_in_future)] +#![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)] + use uuid::Uuid; pub mod db; @@ -17,11 +46,36 @@ pub fn chain_optional_iter( .collect() } +#[inline] +#[must_use] +pub const fn u64_to_frontend(num: u64) -> (u32, u32) { + #[allow(clippy::cast_possible_truncation)] + { + // SAFETY: We're splitting in (high, low) parts, so we're not going to lose data on truncation + ((num >> 32) as u32, num as u32) + } +} + +#[inline] +#[must_use] +pub const fn i64_to_frontend(num: i64) -> (i32, u32) { + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + { + // SAFETY: We're splitting in (high, low) parts, so we're not going to lose data on truncation + ((num >> 32) as i32, num as u32) + } +} + +#[inline] #[must_use] pub fn uuid_to_bytes(uuid: Uuid) -> Vec { uuid.as_bytes().to_vec() } +/// Converts a byte slice to a `Uuid` +/// # Panics +/// Panics if the byte slice is not a valid `Uuid` which means we have a corrupted database +#[inline] #[must_use] pub fn from_bytes_to_uuid(bytes: &[u8]) -> Uuid { Uuid::from_slice(bytes).expect("corrupted uuid in database") @@ -43,6 +97,8 @@ macro_rules! msgpack { // Only used for testing purposes. Do not use in production code. use std::any::type_name; +#[inline] +#[must_use] pub fn test_type_of(_: T) -> &'static str { type_name::() } diff --git a/interface/app/$libraryId/Layout/Sidebar/JobManager/Job.tsx b/interface/app/$libraryId/Layout/Sidebar/JobManager/Job.tsx index 35553bb972cc..db1b29b3f2a1 100644 --- a/interface/app/$libraryId/Layout/Sidebar/JobManager/Job.tsx +++ b/interface/app/$libraryId/Layout/Sidebar/JobManager/Job.tsx @@ -9,7 +9,7 @@ import { Trash } from '@phosphor-icons/react'; import { memo } from 'react'; -import { JobProgressEvent, JobReport, useJobInfo } from '@sd/client'; +import { JobProgressEvent, Report, useJobInfo } from '@sd/client'; import { ProgressBar } from '@sd/ui'; import { showAlertDialog } from '~/components'; import { useLocale } from '~/hooks'; @@ -17,7 +17,7 @@ import { useLocale } from '~/hooks'; import JobContainer from './JobContainer'; interface JobProps { - job: JobReport; + job: Report; className?: string; isChild?: boolean; progress: JobProgressEvent | null; @@ -41,12 +41,13 @@ function Job({ job, className, isChild, progress }: JobProps) { if (job.status === 'CompletedWithErrors') { const JobError = (
-				{job.errors_text.map((error, i) => (
+				{job.non_critical_errors.map((error, i) => (
 					

- {error.trim()} + {/* TODO: Report errors in a nicer way */} + {JSON.stringify(error)}

))}
diff --git a/interface/app/$libraryId/Layout/Sidebar/JobManager/JobGroup.tsx b/interface/app/$libraryId/Layout/Sidebar/JobManager/JobGroup.tsx index d579cef4ada6..69e7ea1782dd 100644 --- a/interface/app/$libraryId/Layout/Sidebar/JobManager/JobGroup.tsx +++ b/interface/app/$libraryId/Layout/Sidebar/JobManager/JobGroup.tsx @@ -10,7 +10,7 @@ import { getTotalTasks, JobGroup, JobProgressEvent, - JobReport, + Report, useLibraryMutation, useTotalElapsedTimeText } from '@sd/client'; @@ -153,7 +153,7 @@ function Options({ setShowChildJobs, showChildJobs }: { - activeJob?: JobReport; + activeJob?: Report; group: JobGroup; setShowChildJobs: () => void; showChildJobs: boolean; diff --git a/interface/hooks/useIsLocationIndexing.ts b/interface/hooks/useIsLocationIndexing.ts index 70308895a8bb..c77604e5ed88 100644 --- a/interface/hooks/useIsLocationIndexing.ts +++ b/interface/hooks/useIsLocationIndexing.ts @@ -14,9 +14,16 @@ export const useIsLocationIndexing = (locationId: number): boolean => { const isLocationIndexing = jobGroups?.some((group) => group.jobs.some((job) => { + let jobLocationId: number | undefined; + for (const metadata of job.metadata) { + if (metadata.type === 'input' && metadata.metadata.type === 'location') { + jobLocationId = metadata.metadata.data.id; + break; + } + } if ( - job.name === 'indexer' && - (job.metadata as any)?.location.id === locationId && + job.name === 'Indexer' && + jobLocationId === locationId && (job.status === 'Running' || job.status === 'Queued') ) { return job.completed_task_count === 0; diff --git a/interface/hooks/useRedirectToNewLocation.ts b/interface/hooks/useRedirectToNewLocation.ts index c76f5b324de8..8b1ba8479759 100644 --- a/interface/hooks/useRedirectToNewLocation.ts +++ b/interface/hooks/useRedirectToNewLocation.ts @@ -23,12 +23,18 @@ export const useRedirectToNewLocation = () => { const hasIndexerJob = jobGroups ?.flatMap((j) => j.jobs) - .some( - (j) => - j.name === 'indexer' && - (j.metadata as any)?.location.id === newLocation && - (j.completed_task_count > 0 || j.completed_at != null) - ); + .some((j) => { + let locationId: number | undefined; + for (const metadata of j.metadata) { + if (metadata.type === 'input' && metadata.metadata.type === 'location') { + locationId = metadata.metadata.data.id; + break; + } + } + j.name === 'Indexer' && + locationId === newLocation && + (j.completed_task_count > 0 || j.completed_at != null); + }); useEffect(() => { if (hasIndexerJob) { diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index 232d1397d541..363fee3ce52e 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -574,11 +574,11 @@ export type RenameOne = { from_file_path_id: number; to: string } export type Report = { id: string; name: JobName; action: string | null; metadata: ReportMetadata[]; critical_error: string | null; non_critical_errors: NonCriticalError[]; created_at: string | null; started_at: string | null; completed_at: string | null; parent_id: string | null; status: Status; task_count: number; completed_task_count: number; phase: string; message: string; estimated_completion: string } -export type ReportInputMetadata = { location: Location } | { sub_path: string } +export type ReportInputMetadata = { type: "location"; data: Location } | { type: "sub_path"; data: string } -export type ReportMetadata = { input: ReportInputMetadata } | { output: ReportOutputMetadata } +export type ReportMetadata = { type: "input"; metadata: ReportInputMetadata } | { type: "output"; metadata: ReportOutputMetadata } -export type ReportOutputMetadata = { metrics: { [key in string]: JsonValue } } +export type ReportOutputMetadata = { type: "metrics"; data: { [key in string]: JsonValue } } | { type: "indexer"; data: { total_paths: [number, number] } } | { type: "file_identifier"; data: { total_orphan_paths: [number, number]; total_objects_created: [number, number]; total_objects_linked: [number, number] } } | { type: "media_processor"; data: { media_data_extracted: [number, number]; media_data_skipped: [number, number]; thumbnails_generated: [number, number]; thumbnails_skipped: [number, number] } } export type RescanArgs = { location_id: number; sub_path: string } diff --git a/packages/client/src/utils/index.ts b/packages/client/src/utils/index.ts index 7ad20f405be8..8e536e7c4acc 100644 --- a/packages/client/src/utils/index.ts +++ b/packages/client/src/utils/index.ts @@ -123,7 +123,7 @@ export type UnionToTuple = ? [...UnionToTuple>, W] : []; -export function formatNumber(n: number) { +export function formatNumber(n: number | bigint) { if (!n) return '0'; return Intl.NumberFormat().format(n); } @@ -142,6 +142,11 @@ export function int32ArrayToBigInt([high, low]: [number, number]) { return (BigInt(high | 0) << 32n) | BigInt(low >>> 0); } +export function uint32ArrayToBigInt([high, low]: [number, number]) { + // Note: These magic shift operations internally convert high into u32 and low into u32 + return (BigInt(high >>> 0) << 32n) | BigInt(low >>> 0); +} + export function capitalize(string: T): Capitalize { return (string.charAt(0).toUpperCase() + string.slice(1)) as Capitalize; } diff --git a/packages/client/src/utils/jobs/index.ts b/packages/client/src/utils/jobs/index.ts index b5429dbdf17e..048bec569892 100644 --- a/packages/client/src/utils/jobs/index.ts +++ b/packages/client/src/utils/jobs/index.ts @@ -1,11 +1,11 @@ import { ForwardRefExoticComponent } from 'react'; -import { JobReport } from '../../core'; +import { Report } from '../../core'; +export * from './context'; export * from './useGroupJobTimeText'; export * from './useJobInfo'; export * from './useJobProgress'; -export * from './context'; // NOTE: This type is also used on mobile except for the tooltip, Be careful when changing it. @@ -19,7 +19,7 @@ export interface TextItem { // first array for lines, second array for items separated by " • ". export type TextItems = (TextItem | undefined)[][]; -export function getTotalTasks(jobs: JobReport[]) { +export function getTotalTasks(jobs: Report[]) { const tasks = { completed: 0, total: 0, timeOfLastFinishedJob: '' }; jobs?.forEach(({ task_count, status, completed_at, completed_task_count }) => { @@ -33,8 +33,17 @@ export function getTotalTasks(jobs: JobReport[]) { return tasks; } -export function getJobNiceActionName(action: string, completed: boolean, job?: JobReport) { - const name = (job?.metadata?.location as any)?.name || 'Unknown'; +export function getJobNiceActionName(action: string, completed: boolean, job?: Report) { + let name = 'Unknown'; + if (job != null) { + for (const metadata of job.metadata) { + if (metadata.type === 'input' && metadata.metadata.type === 'location') { + name = metadata.metadata.data.name ?? name; + break; + } + } + } + switch (action) { case 'scan_location': return completed ? `Added location "${name}"` : `Adding location "${name}"`; diff --git a/packages/client/src/utils/jobs/useGroupJobTimeText.tsx b/packages/client/src/utils/jobs/useGroupJobTimeText.tsx index f725416e87e1..37672c57f2eb 100644 --- a/packages/client/src/utils/jobs/useGroupJobTimeText.tsx +++ b/packages/client/src/utils/jobs/useGroupJobTimeText.tsx @@ -2,20 +2,20 @@ import dayjs from 'dayjs'; import duration from 'dayjs/plugin/duration'; import { useEffect, useMemo } from 'react'; -import { JobReport } from '../../core'; +import { Report } from '../../core'; import { useForceUpdate } from '../../hooks'; dayjs.extend(duration); // TODO: refactor this, its a mess. -export function useTotalElapsedTimeText(jobs: JobReport[] = []) { +export function useTotalElapsedTimeText(jobs: Report[] = []) { const forceUpdate = useForceUpdate(); const elapsedTimeText = useMemo(() => { let total = 0; let text: string | null = ''; - const groupedJobs = jobs.reduce((acc: Record, job) => { + const groupedJobs = jobs.reduce((acc: Record, job) => { const parentId = String(job.parent_id); if (!acc[parentId]) { acc[parentId] = []; @@ -24,7 +24,7 @@ export function useTotalElapsedTimeText(jobs: JobReport[] = []) { return acc; }, {}); - Object.values(groupedJobs).forEach((group: JobReport[]) => { + Object.values(groupedJobs).forEach((group: Report[]) => { let groupTotal = 0; group.forEach((job) => { const start = dayjs(job.started_at); diff --git a/packages/client/src/utils/jobs/useJobInfo.tsx b/packages/client/src/utils/jobs/useJobInfo.tsx index a321d0eea6ec..91544350f590 100644 --- a/packages/client/src/utils/jobs/useJobInfo.tsx +++ b/packages/client/src/utils/jobs/useJobInfo.tsx @@ -1,6 +1,6 @@ import { TextItems } from '.'; -import { formatNumber } from '../..'; -import { JobProgressEvent, JobReport } from '../../core'; +import { formatNumber, uint32ArrayToBigInt } from '../..'; +import { JobProgressEvent, Report, ReportOutputMetadata } from '../../core'; interface JobNiceData { name: string; @@ -9,39 +9,55 @@ interface JobNiceData { isRunning: boolean; isQueued: boolean; isPaused: boolean; - indexedPath: any; + indexedPath?: any; taskCount: number; completedTaskCount: number; meta: any; output: any; } -export function useJobInfo(job: JobReport, realtimeUpdate: JobProgressEvent | null): JobNiceData { +export function useJobInfo(job: Report, realtimeUpdate: JobProgressEvent | null): JobNiceData { const isRunning = job.status === 'Running', isQueued = job.status === 'Queued', isPaused = job.status === 'Paused', - indexedPath = (job.metadata?.data as any)?.location.path, taskCount = realtimeUpdate?.task_count || job.task_count, completedTaskCount = realtimeUpdate?.completed_task_count || job.completed_task_count, - phase = realtimeUpdate?.phase, - meta = job.metadata, - output = (meta?.output as any)?.run_metadata; + phase = realtimeUpdate?.phase; + + const output: ReportOutputMetadata[] = []; + let indexedPath: string | undefined; + for (const metadata of job.metadata) { + if (metadata.type === 'output') { + output.push(metadata.metadata); + } + + if (metadata.type === 'input' && metadata.metadata.type === 'sub_path') { + indexedPath = metadata.metadata.data; + } + } const data = { isRunning, isQueued, isPaused, - indexedPath, taskCount, completedTaskCount, - meta, + meta: job.metadata, output }; switch (job.name) { - case 'indexer': + case 'Indexer': { + let totalPaths = 0n; + for (const metadata of output) { + if (metadata.type === 'indexer') { + totalPaths = uint32ArrayToBigInt(metadata.data.total_paths); + } + } + return { ...data, + indexedPath, name: `${isQueued ? 'Index' : isRunning ? 'Indexing' : 'Indexed'} files ${ indexedPath ? `at ${indexedPath}` : `` }`, @@ -52,16 +68,40 @@ export function useJobInfo(job: JobReport, realtimeUpdate: JobProgressEvent | nu ? job.message : isRunning && realtimeUpdate?.message ? realtimeUpdate.message - : `${formatNumber(output?.total_paths)} ${plural( - output?.total_paths, + : `${formatNumber(totalPaths)} ${plural( + totalPaths, 'path' )} discovered` } ] ] }; - case 'media_processor': { + } + case 'MediaProcessor': { const generateTexts = () => { + const parsedOutput = { + mediaDataExtracted: 0n, + mediaDataSkipped: 0n, + thumbnailsGenerated: 0n, + thumbnailsSkipped: 0n + }; + for (const metadata of output) { + if (metadata.type === 'media_processor') { + const { + media_data_extracted, + media_data_skipped, + thumbnails_generated, + thumbnails_skipped + } = metadata.data; + + parsedOutput.mediaDataExtracted = uint32ArrayToBigInt(media_data_extracted); + parsedOutput.mediaDataSkipped = uint32ArrayToBigInt(media_data_skipped); + parsedOutput.thumbnailsGenerated = + uint32ArrayToBigInt(thumbnails_generated); + parsedOutput.thumbnailsSkipped = uint32ArrayToBigInt(thumbnails_skipped); + } + } + switch (phase) { case 'media_data': { return [ @@ -69,7 +109,7 @@ export function useJobInfo(job: JobReport, realtimeUpdate: JobProgressEvent | nu text: `${ completedTaskCount ? formatNumber(completedTaskCount || 0) - : formatNumber(output?.exif_data?.extracted) + : formatNumber(parsedOutput.mediaDataExtracted) } of ${formatNumber(taskCount)} ${plural( taskCount, 'media file' @@ -84,7 +124,7 @@ export function useJobInfo(job: JobReport, realtimeUpdate: JobProgressEvent | nu text: `${ completedTaskCount ? formatNumber(completedTaskCount || 0) - : formatNumber(output?.thumbs_processed) + : formatNumber(parsedOutput.thumbnailsGenerated) } of ${formatNumber(taskCount)} ${plural( taskCount, 'thumbnail' @@ -108,11 +148,12 @@ export function useJobInfo(job: JobReport, realtimeUpdate: JobProgressEvent | nu default: { // If we don't have a phase set, then we're done - const totalThumbs = output?.thumbs_processed || 0; + const totalThumbs = + parsedOutput.thumbnailsGenerated + parsedOutput.thumbnailsSkipped; const totalMediaFiles = - output?.exif_data?.extracted || 0 + output?.exif_data?.skipped || 0; + parsedOutput.mediaDataExtracted + parsedOutput.mediaDataSkipped; - return totalThumbs === 0 && totalMediaFiles === 0 + return totalThumbs === 0n && totalMediaFiles === 0n ? [{ text: 'None processed' }] : [ { @@ -141,70 +182,89 @@ export function useJobInfo(job: JobReport, realtimeUpdate: JobProgressEvent | nu }; } - case 'file_identifier': + case 'FileIdentifier': { + const parsedOutput = { + totalOrphanPaths: 0n, + totalObjectsCreated: 0n, + totalObjectsLinked: 0n + }; + for (const metadata of output) { + if (metadata.type === 'file_identifier') { + const { total_orphan_paths, total_objects_created, total_objects_linked } = + metadata.data; + + parsedOutput.totalOrphanPaths = uint32ArrayToBigInt(total_orphan_paths); + parsedOutput.totalObjectsCreated = uint32ArrayToBigInt(total_objects_created); + parsedOutput.totalObjectsLinked = uint32ArrayToBigInt(total_objects_linked); + } + } + return { ...data, name: `${isQueued ? 'Extract' : isRunning ? 'Extracting' : 'Extracted'} metadata`, textItems: [ !isRunning - ? output?.total_orphan_paths === 0 + ? parsedOutput.totalOrphanPaths === 0n ? [{ text: 'No files changed' }] : [ { - text: `${formatNumber(output?.total_orphan_paths)} ${plural( - output?.total_orphan_paths, + text: `${formatNumber(parsedOutput.totalOrphanPaths)} ${plural( + parsedOutput.totalOrphanPaths, 'file' )}` }, { text: `${formatNumber( - output?.total_objects_created + parsedOutput.totalObjectsCreated )} ${plural( - output?.total_objects_created, + parsedOutput.totalObjectsCreated, 'Object' )} created` }, { text: `${formatNumber( - output?.total_objects_linked - )} ${plural(output?.total_objects_linked, 'Object')} linked` + parsedOutput.totalObjectsLinked + )} ${plural(parsedOutput.totalObjectsLinked, 'Object')} linked` } ] : [{ text: addCommasToNumbersInMessage(realtimeUpdate?.message) }] ] }; - case 'file_copier': - return { - ...data, - name: `${isQueued ? 'Copy' : isRunning ? 'Copying' : 'Copied'} ${ - isRunning ? completedTaskCount + 1 : completedTaskCount - } ${isRunning ? `of ${job.task_count}` : ``} ${plural(job.task_count, 'file')}`, - textItems: [[{ text: job.status }]] - }; - case 'file_deleter': - return { - ...data, - name: `${ - isQueued ? 'Delete' : isRunning ? 'Deleting' : 'Deleted' - } ${completedTaskCount} ${plural(completedTaskCount, 'file')}`, - textItems: [[{ text: job.status }]] - }; - case 'file_cutter': - return { - ...data, - name: `${ - isQueued ? 'Cut' : isRunning ? 'Cutting' : 'Cut' - } ${completedTaskCount} ${plural(completedTaskCount, 'file')}`, - textItems: [[{ text: job.status }]] - }; - case 'object_validator': - return { - ...data, - name: `${isQueued ? 'Validate' : isRunning ? 'Validating' : 'Validated'} ${ - !isQueued ? completedTaskCount : '' - } ${plural(completedTaskCount, 'object')}`, - textItems: [[{ text: job.status }]] - }; + } + + // TODO(fogodev): put these back in when they're implemented + // case 'file_copier': + // return { + // ...data, + // name: `${isQueued ? 'Copy' : isRunning ? 'Copying' : 'Copied'} ${ + // isRunning ? completedTaskCount + 1 : completedTaskCount + // } ${isRunning ? `of ${job.task_count}` : ``} ${plural(job.task_count, 'file')}`, + // textItems: [[{ text: job.status }]] + // }; + // case 'file_deleter': + // return { + // ...data, + // name: `${ + // isQueued ? 'Delete' : isRunning ? 'Deleting' : 'Deleted' + // } ${completedTaskCount} ${plural(completedTaskCount, 'file')}`, + // textItems: [[{ text: job.status }]] + // }; + // case 'file_cutter': + // return { + // ...data, + // name: `${ + // isQueued ? 'Cut' : isRunning ? 'Cutting' : 'Cut' + // } ${completedTaskCount} ${plural(completedTaskCount, 'file')}`, + // textItems: [[{ text: job.status }]] + // }; + // case 'object_validator': + // return { + // ...data, + // name: `${isQueued ? 'Validate' : isRunning ? 'Validating' : 'Validated'} ${ + // !isQueued ? completedTaskCount : '' + // } ${plural(completedTaskCount, 'object')}`, + // textItems: [[{ text: job.status }]] + // }; default: return { ...data, @@ -214,10 +274,9 @@ export function useJobInfo(job: JobReport, realtimeUpdate: JobProgressEvent | nu } } -function plural(count: number, name?: string) { - if (count === 1) { - return name || ''; - } +function plural(count: number | bigint, name?: string) { + if (count === 1 || count === 1n) return name || ''; + return `${name || ''}s`; } From 548b671395fc6513e007d6c4c19659e5802ad34b Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Wed, 15 May 2024 23:45:42 -0300 Subject: [PATCH 10/33] Fix race condition on steal tasks --- crates/task-system/src/message.rs | 17 ++- crates/task-system/src/task.rs | 37 ++++- crates/task-system/src/worker/mod.rs | 59 ++++---- crates/task-system/src/worker/run.rs | 38 +++-- crates/task-system/src/worker/runner.rs | 175 +++++++++++++++--------- crates/utils/src/db.rs | 42 ++++-- 6 files changed, 241 insertions(+), 127 deletions(-) diff --git a/crates/task-system/src/message.rs b/crates/task-system/src/message.rs index f6f8265c7ef2..964131db76b4 100644 --- a/crates/task-system/src/message.rs +++ b/crates/task-system/src/message.rs @@ -1,8 +1,9 @@ +use async_channel as chan; use tokio::sync::oneshot; use super::{ error::{RunError, SystemError}, - task::{TaskId, TaskWorkState}, + task::{InternalTaskExecStatus, TaskId, TaskWorkState}, worker::WorkerId, }; @@ -58,6 +59,18 @@ pub enum WorkerMessage { ack: oneshot::Sender>, }, ShutdownRequest(oneshot::Sender<()>), - StealRequest(oneshot::Sender>>), + StealRequest { + ack: oneshot::Sender, + stolen_task_tx: chan::Sender>>, + }, WakeUp, } + +pub struct TaskRunnerOutput { + pub task_work_state: TaskWorkState, + pub status: InternalTaskExecStatus, +} + +pub struct TaskOutputMessage(pub TaskId, pub Result, ()>); + +pub struct StoleTaskMessage(pub TaskWorkState); diff --git a/crates/task-system/src/task.rs b/crates/task-system/src/task.rs index 1f9c3bcd93c1..eb35d7049c80 100644 --- a/crates/task-system/src/task.rs +++ b/crates/task-system/src/task.rs @@ -12,7 +12,6 @@ use std::{ use async_channel as chan; use async_trait::async_trait; -use chan::{Recv, RecvError}; use downcast_rs::{impl_downcast, Downcast}; use tokio::{spawn, sync::oneshot}; use tracing::{error, trace, warn}; @@ -181,7 +180,7 @@ where #[pin_project::pin_project] pub struct InterrupterFuture<'recv> { #[pin] - fut: Recv<'recv, InterruptionRequest>, + fut: chan::Recv<'recv, InterruptionRequest>, has_interrupted: &'recv AtomicU8, } @@ -199,7 +198,7 @@ impl Future for InterrupterFuture<'_> { this.has_interrupted.store(kind as u8, Ordering::Relaxed); Poll::Ready(kind) } - Poll::Ready(Err(RecvError)) => { + Poll::Ready(Err(chan::RecvError)) => { // In case the task handle was dropped, we can't receive any more interrupt messages // so we will never interrupt and the task will run freely until ended warn!("Task interrupter channel closed, will run task until it finishes!"); @@ -659,7 +658,7 @@ impl TaskWorktable { pub struct TaskWorkState { pub(crate) task: Box>, pub(crate) worktable: Arc, - pub(crate) done_tx: oneshot::Sender, SystemError>>, + pub(crate) done_tx: PanicOnSenderDrop, pub(crate) interrupter: Arc, } @@ -670,3 +669,33 @@ impl TaskWorkState { .store(new_worker_id, Ordering::Relaxed); } } + +#[derive(Debug)] +pub struct PanicOnSenderDrop( + Option, SystemError>>>, +); + +impl PanicOnSenderDrop { + pub fn new(done_tx: oneshot::Sender, SystemError>>) -> Self { + Self(Some(done_tx)) + } + + pub fn send( + mut self, + res: Result, SystemError>, + ) -> Result<(), Result, SystemError>> { + self.0 + .take() + .expect("tried to send a task output twice to the same task handle") + .send(res) + } +} + +impl Drop for PanicOnSenderDrop { + fn drop(&mut self) { + assert!( + self.0.is_none(), + "TaskHandle done channel dropped before sending a result" + ); + } +} diff --git a/crates/task-system/src/worker/mod.rs b/crates/task-system/src/worker/mod.rs index cdeae4ddc4a3..7ba9317d3e51 100644 --- a/crates/task-system/src/worker/mod.rs +++ b/crates/task-system/src/worker/mod.rs @@ -8,14 +8,13 @@ use async_channel as chan; use tokio::{spawn, sync::oneshot, task::JoinHandle}; use tracing::{error, info, trace, warn}; -use crate::task::TaskRemoteController; - use super::{ error::{RunError, SystemError}, - message::WorkerMessage, + message::{StoleTaskMessage, TaskRunnerOutput, WorkerMessage}, system::SystemComm, task::{ - InternalTaskExecStatus, Interrupter, Task, TaskHandle, TaskId, TaskWorkState, TaskWorktable, + Interrupter, PanicOnSenderDrop, Task, TaskHandle, TaskId, TaskRemoteController, + TaskWorkState, TaskWorktable, }, }; @@ -123,7 +122,7 @@ impl Worker { task: new_task, worktable: Arc::clone(&worktable), interrupter: Arc::new(Interrupter::new(interrupt_rx)), - done_tx, + done_tx: PanicOnSenderDrop::new(done_tx), })) .await .expect("Worker channel closed trying to add task"); @@ -235,26 +234,22 @@ pub struct WorkerComm { } impl WorkerComm { - pub async fn steal_task(&self, worker_id: WorkerId) -> Option> { + pub async fn steal_task( + &self, + stolen_task_tx: chan::Sender>>, + ) -> bool { let (tx, rx) = oneshot::channel(); self.msgs_tx - .send(WorkerMessage::StealRequest(tx)) + .send(WorkerMessage::StealRequest { + ack: tx, + stolen_task_tx, + }) .await .expect("Worker channel closed trying to steal task"); rx.await .expect("Worker channel closed trying to steal task") - .map(|task_work_state| { - trace!( - "Worker stole task: \ - ", - self.worker_id, - task_work_state.task.id() - ); - task_work_state.change_worker(worker_id); - task_work_state - }) } } @@ -277,7 +272,11 @@ impl WorkStealer { } } - pub async fn steal(&self, worker_id: WorkerId) -> Option> { + pub async fn steal( + &self, + worker_id: WorkerId, + stolen_task_tx: &chan::Sender>>, + ) { let total_workers = self.worker_comms.len(); for worker_comm in self @@ -297,8 +296,12 @@ impl WorkStealer { worker_comm.worker_id ); - if let Some(task) = worker_comm.steal_task(worker_id).await { - return Some(task); + if worker_comm.steal_task(stolen_task_tx.clone()).await { + trace!( + "Worker successfully stole a task", + worker_comm.worker_id + ); + return; } trace!( @@ -307,20 +310,14 @@ impl WorkStealer { ); } - None + trace!("No workers have tasks to steal"); + stolen_task_tx + .send(None) + .await + .expect("Stolen task channel closed"); } pub fn workers_count(&self) -> usize { self.worker_comms.len() } } - -struct TaskRunnerOutput { - task_work_state: TaskWorkState, - status: InternalTaskExecStatus, -} - -enum RunnerMessage { - TaskOutput(TaskId, Result, ()>), - StoleTask(Option>), -} diff --git a/crates/task-system/src/worker/run.rs b/crates/task-system/src/worker/run.rs index 70de8c65c720..b9910c8496c2 100644 --- a/crates/task-system/src/worker/run.rs +++ b/crates/task-system/src/worker/run.rs @@ -5,12 +5,16 @@ use futures::StreamExt; use futures_concurrency::stream::Merge; use tokio::time::{interval_at, Instant}; use tokio_stream::wrappers::IntervalStream; -use tracing::{error, warn}; +use tracing::{debug, error, warn}; use super::{ - super::{error::RunError, message::WorkerMessage, system::SystemComm}, + super::{ + error::RunError, + message::{StoleTaskMessage, TaskOutputMessage, WorkerMessage}, + system::SystemComm, + }, runner::Runner, - RunnerMessage, WorkStealer, WorkerId, ONE_SECOND, + WorkStealer, WorkerId, ONE_SECOND, }; pub(super) async fn run( @@ -21,18 +25,20 @@ pub(super) async fn run( ) { enum StreamMessage { Commands(WorkerMessage), - RunnerMsg(RunnerMessage), + Steal(Option>), + TaskOutput(TaskOutputMessage), IdleCheck, } - let (mut runner, runner_rx) = Runner::new(id, work_stealer, system_comm); + let (mut runner, stole_task_rx, task_output_rx) = Runner::new(id, work_stealer, system_comm); let mut idle_checker_interval = interval_at(Instant::now(), ONE_SECOND); idle_checker_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); let mut msg_stream = pin!(( msgs_rx.map(StreamMessage::Commands), - runner_rx.map(StreamMessage::RunnerMsg), + stole_task_rx.map(StreamMessage::Steal), + task_output_rx.map(StreamMessage::TaskOutput), IntervalStream::new(idle_checker_interval).map(|_| StreamMessage::IdleCheck), ) .merge()); @@ -80,16 +86,26 @@ pub(super) async fn run( return runner.shutdown(tx).await; } - StreamMessage::Commands(WorkerMessage::StealRequest(tx)) => runner.steal_request(tx), + StreamMessage::Commands(WorkerMessage::StealRequest { + ack, + stolen_task_tx, + }) => { + if ack + .send(runner.steal_request(stolen_task_tx).await) + .is_err() + { + debug!("Steal request attempt aborted before sending ack"); + } + } StreamMessage::Commands(WorkerMessage::WakeUp) => runner.wake_up(), // Runner messages - StreamMessage::RunnerMsg(RunnerMessage::TaskOutput(task_id, Ok(output))) => { + StreamMessage::TaskOutput(TaskOutputMessage(task_id, Ok(output))) => { runner.process_task_output(task_id, output).await; } - StreamMessage::RunnerMsg(RunnerMessage::TaskOutput(task_id, Err(()))) => { + StreamMessage::TaskOutput(TaskOutputMessage(task_id, Err(()))) => { error!("Task failed "); runner.clean_suspended_task(task_id); @@ -97,8 +113,8 @@ pub(super) async fn run( runner.dispatch_next_task(task_id).await; } - StreamMessage::RunnerMsg(RunnerMessage::StoleTask(maybe_new_task)) => { - runner.process_stolen_task(maybe_new_task).await; + StreamMessage::Steal(maybe_stolen_task) => { + runner.process_stolen_task(maybe_stolen_task).await; } // Idle checking to steal some work diff --git a/crates/task-system/src/worker/runner.rs b/crates/task-system/src/worker/runner.rs index 5e680ceba2a1..675204c9fa55 100644 --- a/crates/task-system/src/worker/runner.rs +++ b/crates/task-system/src/worker/runner.rs @@ -11,7 +11,7 @@ use std::{ use async_channel as chan; use futures::{FutureExt, StreamExt}; -use futures_concurrency::future::Race; +use futures_concurrency::{future::Race, stream::Merge}; use tokio::{ spawn, sync::oneshot, @@ -23,13 +23,14 @@ use tracing::{debug, error, trace, warn}; use super::{ super::{ error::{RunError, SystemError}, + message::{StoleTaskMessage, TaskOutputMessage}, system::SystemComm, task::{ - ExecStatus, InternalTaskExecStatus, Interrupter, Task, TaskId, TaskOutput, TaskStatus, - TaskWorkState, TaskWorktable, + ExecStatus, InternalTaskExecStatus, Interrupter, PanicOnSenderDrop, Task, TaskId, + TaskOutput, TaskStatus, TaskWorkState, TaskWorktable, }, }, - RunnerMessage, TaskRunnerOutput, WorkStealer, WorkerId, ONE_SECOND, + TaskRunnerOutput, WorkStealer, WorkerId, ONE_SECOND, }; const TEN_SECONDS: Duration = Duration::from_secs(10); @@ -75,16 +76,9 @@ struct RunningTask { fn dispatch_steal_request( worker_id: WorkerId, work_stealer: WorkStealer, - runner_tx: chan::Sender>, + stole_task_tx: chan::Sender>>, ) -> JoinHandle<()> { - spawn(async move { - runner_tx - .send(RunnerMessage::StoleTask( - work_stealer.steal(worker_id).await, - )) - .await - .expect("runner channel closed before send stolen task"); - }) + spawn(async move { work_stealer.steal(worker_id, &stole_task_tx).await }) } enum WaitingSuspendedTask { @@ -111,21 +105,30 @@ pub(super) struct Runner { is_idle: bool, waiting_suspension: WaitingSuspendedTask, abort_and_suspend_map: HashMap, - msgs_tx: chan::Sender>, + stole_task_tx: chan::Sender>>, + task_output_tx: chan::Sender>, current_task_handle: Option, - suspend_on_shutdown_rx: chan::Receiver>, + suspend_on_shutdown_stole_task_rx: chan::Receiver>>, + suspend_on_shutdown_task_output_rx: chan::Receiver>, current_steal_task_handle: Option>, last_steal_attempt_at: Instant, steal_attempts_count: u32, } +type RunnerCreate = ( + Runner, + chan::Receiver>>, + chan::Receiver>, +); + impl Runner { pub(super) fn new( worker_id: WorkerId, work_stealer: WorkStealer, system_comm: SystemComm, - ) -> (Self, chan::Receiver>) { - let (runner_tx, runner_rx) = chan::bounded(8); + ) -> RunnerCreate { + let (stolen_task_tx, stolen_task_rx) = chan::bounded(2); + let (task_output_tx, task_output_rx) = chan::bounded(8); ( Self { @@ -141,14 +144,17 @@ impl Runner { is_idle: true, waiting_suspension: WaitingSuspendedTask::None, abort_and_suspend_map: HashMap::with_capacity(ABORT_AND_SUSPEND_MAP_INITIAL_SIZE), - msgs_tx: runner_tx, + stole_task_tx: stolen_task_tx, + task_output_tx, current_task_handle: None, - suspend_on_shutdown_rx: runner_rx.clone(), + suspend_on_shutdown_stole_task_rx: stolen_task_rx.clone(), + suspend_on_shutdown_task_output_rx: task_output_rx.clone(), current_steal_task_handle: None, last_steal_attempt_at: Instant::now(), steal_attempts_count: 0, }, - runner_rx, + stolen_task_rx, + task_output_rx, ) } @@ -190,7 +196,7 @@ impl Runner { let handle = spawn(run_single_task( self.worker_id, task_work_state, - self.msgs_tx.clone(), + self.task_output_tx.clone(), suspend_rx, abort_rx, )); @@ -671,9 +677,11 @@ impl Runner { priority_tasks, is_idle, abort_and_suspend_map, - msgs_tx: runner_tx, + stole_task_tx: stolen_task_tx, + task_output_tx, mut current_task_handle, - suspend_on_shutdown_rx, + suspend_on_shutdown_stole_task_rx, + suspend_on_shutdown_task_output_rx, .. } = self; @@ -705,10 +713,15 @@ impl Runner { error!("Task failed to join: {e:#?}"); } - runner_tx.close(); + stolen_task_tx.close(); + task_output_tx.close(); - Self::process_tasks_being_suspended_on_shutdown(worker_id, suspend_on_shutdown_rx) - .await; + Self::process_tasks_being_suspended_on_shutdown( + worker_id, + suspend_on_shutdown_stole_task_rx, + suspend_on_shutdown_task_output_rx, + ) + .await; } priority_tasks @@ -734,13 +747,23 @@ impl Runner { async fn process_tasks_being_suspended_on_shutdown( worker_id: WorkerId, - suspend_on_shutdown_rx: chan::Receiver>, + suspend_on_shutdown_stole_task_rx: chan::Receiver>>, + suspend_on_shutdown_task_output_rx: chan::Receiver>, ) { - let mut suspend_on_shutdown_rx = pin!(suspend_on_shutdown_rx); + enum StreamMessage { + Output(TaskOutputMessage), + Steal(Option>), + } + + let mut msg_stream = pin!(( + suspend_on_shutdown_stole_task_rx.map(StreamMessage::Steal), + suspend_on_shutdown_task_output_rx.map(StreamMessage::Output), + ) + .merge()); - while let Some(runner_msg) = suspend_on_shutdown_rx.next().await { - match runner_msg { - RunnerMessage::TaskOutput(task_id, res) => match res { + while let Some(msg) = msg_stream.next().await { + match msg { + StreamMessage::Output(TaskOutputMessage(task_id, res)) => match res { Ok(TaskRunnerOutput { task_work_state, status, @@ -768,7 +791,15 @@ impl Runner { } }, - RunnerMessage::StoleTask(Some(task_work_state)) => { + StreamMessage::Steal(Some(StoleTaskMessage(task_work_state))) => { + trace!( + "Worker stole task: \ + ", + worker_id, + task_work_state.task.id() + ); + task_work_state.change_worker(worker_id); + send_shutdown_task_response( worker_id, &task_work_state.task.id(), @@ -776,7 +807,7 @@ impl Runner { ); } - RunnerMessage::StoleTask(None) => {} + StreamMessage::Steal(None) => {} } } } @@ -797,28 +828,26 @@ impl Runner { .map(|task| (PendingTaskKind::Normal, task)) } - pub(super) fn steal_request(&mut self, tx: oneshot::Sender>>) { + pub(super) async fn steal_request( + &mut self, + stolen_task_tx: chan::Sender>>, + ) -> bool { trace!("Steal request: ", self.worker_id); if let Some((kind, task_work_state)) = self.get_next_task() { - self.proceed_with_task_to_be_stolen(kind, task_work_state, tx); + self.proceed_with_task_to_be_stolen(kind, task_work_state, stolen_task_tx) + .await } else { trace!("No task to steal: ", self.worker_id); - if tx.send(None).is_err() { - warn!( - "Steal request channel closed before sending no task response: \ - ", - self.worker_id - ); - } + false } } - fn proceed_with_task_to_be_stolen( + async fn proceed_with_task_to_be_stolen( &mut self, kind: PendingTaskKind, task_work_state: TaskWorkState, - tx: oneshot::Sender>>, - ) { + stolen_task_tx: chan::Sender>>, + ) -> bool { let task_id = task_work_state.task.id(); self.task_kinds.remove(&task_id); @@ -827,8 +856,14 @@ impl Runner { self.worker_id ); - if let Err(Some(task_work_state)) = tx.send(Some(task_work_state)) { + if let Err(chan::SendError(Some(StoleTaskMessage(task_work_state)))) = stolen_task_tx + .send(Some(StoleTaskMessage(task_work_state))) + .await + { self.put_back_failed_to_stole_task(task_id, kind, task_work_state); + false + } else { + true } } @@ -868,7 +903,7 @@ impl Runner { self.current_steal_task_handle = Some(dispatch_steal_request( self.worker_id, self.work_stealer.clone(), - self.msgs_tx.clone(), + self.stole_task_tx.clone(), )); } else { trace!( @@ -946,7 +981,7 @@ impl Runner { self.current_steal_task_handle = Some(dispatch_steal_request( self.worker_id, self.work_stealer.clone(), - self.msgs_tx.clone(), + self.stole_task_tx.clone(), )); } else { trace!( @@ -1046,7 +1081,7 @@ impl Runner { self.current_steal_task_handle = Some(dispatch_steal_request( self.worker_id, self.work_stealer.clone(), - self.msgs_tx.clone(), + self.stole_task_tx.clone(), )); self.last_steal_attempt_at = Instant::now(); } else { @@ -1096,14 +1131,17 @@ impl Runner { } } - pub(super) async fn process_stolen_task(&mut self, maybe_new_task: Option>) { + pub(super) async fn process_stolen_task( + &mut self, + maybe_new_task: Option>, + ) { if let Some(steal_task_handle) = self.current_steal_task_handle.take() { if let Err(e) = steal_task_handle.await { error!("Steal task failed to join: {e:#?}"); } } - if let Some(task_work_state) = maybe_new_task { + if let Some(StoleTaskMessage(task_work_state)) = maybe_new_task { self.system_comm.working_report(self.worker_id).await; trace!( "Stolen task: ", @@ -1237,7 +1275,7 @@ fn handle_task_suspension( type PartialTaskWorkState = ( TaskId, Arc, - oneshot::Sender, SystemError>>, + PanicOnSenderDrop, Arc, ); @@ -1246,21 +1284,22 @@ async fn emit_task_completed_message( run_task_output: RunTaskOutput, has_suspended: Arc, (task_id, worktable, done_tx, interrupter): PartialTaskWorkState, - runner_tx: chan::Sender>, + task_output_tx: chan::Sender>, ) { match run_task_output { (task, Ok(res)) => { trace!( "Task completed ok: " ); - runner_tx - .send(RunnerMessage::TaskOutput(task_id, { + task_output_tx + .send(TaskOutputMessage(task_id, { let mut internal_status = res.into(); + let suspended = has_suspended.load(Ordering::SeqCst); - if matches!(internal_status, InternalTaskExecStatus::Paused) - && has_suspended.load(Ordering::Relaxed) - { + if matches!(internal_status, InternalTaskExecStatus::Paused) && suspended { internal_status = InternalTaskExecStatus::Suspend; + } else if matches!(internal_status, InternalTaskExecStatus::Paused) { + debug!("Task completed with status: {internal_status:#?}"); } Ok(TaskRunnerOutput { @@ -1291,8 +1330,8 @@ async fn emit_task_completed_message( error!("Task done channel closed while sending error response"); } - runner_tx - .send(RunnerMessage::TaskOutput(task_id, Err(()))) + task_output_tx + .send(TaskOutputMessage(task_id, Err(()))) .await .expect("Task runner channel closed while sending task output"); } @@ -1307,7 +1346,7 @@ async fn run_single_task( interrupter, done_tx, }: TaskWorkState, - runner_tx: chan::Sender>, + task_output_tx: chan::Sender>, suspend_rx: oneshot::Receiver<()>, abort_rx: oneshot::Receiver>>, ) { @@ -1364,7 +1403,7 @@ async fn run_single_task( run_task_output, has_suspended, (task_id, worktable, done_tx, interrupter), - runner_tx, + task_output_tx, ) .await; } @@ -1375,8 +1414,8 @@ async fn run_single_task( error!("Task done channel closed while sending join error response"); } - if runner_tx - .send(RunnerMessage::TaskOutput(task_id, Err(()))) + if task_output_tx + .send(TaskOutputMessage(task_id, Err(()))) .await .is_err() { @@ -1393,8 +1432,8 @@ async fn run_single_task( error!("Task done channel closed while sending abort error response"); } - if runner_tx - .send(RunnerMessage::TaskOutput(task_id, Err(()))) + if task_output_tx + .send(TaskOutputMessage(task_id, Err(()))) .await .is_err() { @@ -1430,12 +1469,12 @@ fn send_complete_task_response( if done_tx.send(Ok(TaskStatus::Done((*task_id, out)))).is_err() { warn!( "Task done channel closed before sending done response for task: \ - " + " ); } else { trace!( "Emitted task done signal on shutdown: \ - " + " ); } } diff --git a/crates/utils/src/db.rs b/crates/utils/src/db.rs index f30a247735e4..edd406127d60 100644 --- a/crates/utils/src/db.rs +++ b/crates/utils/src/db.rs @@ -1,8 +1,11 @@ -use prisma_client_rust::{migrations::*, NewClientError}; -use sd_prisma::prisma::{self, PrismaClient}; +use prisma_client_rust::{ + migrations::{DbPushError, MigrateDeployError}, + NewClientError, +}; +use sd_prisma::prisma::PrismaClient; use thiserror::Error; -/// MigrationError represents an error that occurring while opening a initialising and running migrations on the database. +/// `[MigrationError]` represents an error that occurring while opening a initialising and running migrations on the database. #[derive(Error, Debug)] pub enum MigrationError { #[error("An error occurred while initialising a new database connection: {0}")] @@ -14,9 +17,9 @@ pub enum MigrationError { DbPushFailed(#[from] DbPushError), } -/// load_and_migrate will load the database from the given path and migrate it to the latest version of the schema. +/// `[load_and_migrate]` will load the database from the given path and migrate it to the latest version of the schema. pub async fn load_and_migrate(db_url: &str) -> Result { - let client = prisma::PrismaClient::_builder() + let client = PrismaClient::_builder() .with_url(db_url.to_string()) .build() .await @@ -57,25 +60,41 @@ pub async fn load_and_migrate(db_url: &str) -> Result u64 { - u64::from_le_bytes(db_inode.try_into().expect("corrupted inode in database")) +/// Construct back an inode after storing it in database +#[must_use] +pub const fn inode_from_db(db_inode: &[u8]) -> u64 { + u64::from_le_bytes([ + db_inode[0], + db_inode[1], + db_inode[2], + db_inode[3], + db_inode[4], + db_inode[5], + db_inode[6], + db_inode[7], + ]) } +/// Constructs a database representation of an inode +#[must_use] pub fn inode_to_db(inode: u64) -> Vec { inode.to_le_bytes().to_vec() } +#[must_use] pub fn ffmpeg_data_field_to_db(field: i64) -> Vec { field.to_be_bytes().to_vec() } -pub fn ffmpeg_data_field_from_db(field: &[u8]) -> i64 { +#[must_use] +pub const fn ffmpeg_data_field_from_db(field: &[u8]) -> i64 { i64::from_be_bytes([ field[0], field[1], field[2], field[3], field[4], field[5], field[6], field[7], ]) } -pub fn size_in_bytes_from_db(db_size_in_bytes: &[u8]) -> u64 { +#[must_use] +pub const fn size_in_bytes_from_db(db_size_in_bytes: &[u8]) -> u64 { u64::from_be_bytes([ db_size_in_bytes[0], db_size_in_bytes[1], @@ -88,6 +107,7 @@ pub fn size_in_bytes_from_db(db_size_in_bytes: &[u8]) -> u64 { ]) } +#[must_use] pub fn size_in_bytes_to_db(size: u64) -> Vec { size.to_be_bytes().to_vec() } @@ -105,7 +125,7 @@ impl MissingFieldError { impl From for rspc::Error { fn from(value: MissingFieldError) -> Self { - rspc::Error::with_cause( + Self::with_cause( rspc::ErrorCode::InternalServerError, "Missing crucial data in the database".to_string(), value, @@ -122,7 +142,7 @@ pub trait OptionalField: Sized { impl OptionalField for Option { type Out = T; - fn transform(self) -> Option { + fn transform(self) -> Self { self } } From c1e0634a47dab2be6d4729236ee37fa24603a297 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Thu, 16 May 2024 01:07:36 -0300 Subject: [PATCH 11/33] Fixed race condition on task suspend --- .../heavy-lifting/src/job_system/runner.rs | 11 ++++- crates/task-system/src/task.rs | 49 +++++++++++++++++-- crates/task-system/src/worker/runner.rs | 40 +++++++++------ 3 files changed, 82 insertions(+), 18 deletions(-) diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index 5e309f119683..c328fae11d47 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -231,6 +231,10 @@ impl> JobSystemRunner usize { + self.handles.len() + } + fn check_if_job_are_running( &self, job_names: Vec, @@ -310,6 +314,8 @@ impl> JobSystemRunner"); + return Ok(()); } @@ -600,7 +606,10 @@ pub(super) async fn run>( break; } - debug!("Waiting for all jobs to complete before shutting down..."); + debug!( + "Waiting for {} jobs to shutdown before shutting down the job system...", + runner.total_jobs() + ); } // Now the runner can shutdown diff --git a/crates/task-system/src/task.rs b/crates/task-system/src/task.rs index eb35d7049c80..16b94fa22fa0 100644 --- a/crates/task-system/src/task.rs +++ b/crates/task-system/src/task.rs @@ -195,6 +195,12 @@ impl Future for InterrupterFuture<'_> { if ack.send(()).is_err() { warn!("TaskInterrupter ack channel closed"); } + if let InternalInterruptionKind::Suspend(has_suspended) = &kind { + has_suspended.store(true, Ordering::SeqCst); + } + + let kind = kind.into(); + this.has_interrupted.store(kind as u8, Ordering::Relaxed); Poll::Ready(kind) } @@ -250,6 +256,12 @@ impl Interrupter { warn!("TaskInterrupter ack channel closed"); } + if let InternalInterruptionKind::Suspend(has_suspended) = &kind { + has_suspended.store(true, Ordering::SeqCst); + } + + let kind = kind.into(); + self.has_interrupted.store(kind as u8, Ordering::Relaxed); Some(kind) @@ -324,9 +336,25 @@ impl InterruptionKind { } } +#[derive(Debug, Clone)] +enum InternalInterruptionKind { + Pause, + Suspend(Arc), + Cancel, +} + +impl From for InterruptionKind { + fn from(kind: InternalInterruptionKind) -> Self { + match kind { + InternalInterruptionKind::Pause | InternalInterruptionKind::Suspend(_) => Self::Pause, + InternalInterruptionKind::Cancel => Self::Cancel, + } + } +} + #[derive(Debug)] pub struct InterruptionRequest { - kind: InterruptionKind, + kind: InternalInterruptionKind, ack: oneshot::Sender<()>, } @@ -613,7 +641,22 @@ impl TaskWorktable { self.interrupt_tx .send(InterruptionRequest { - kind: InterruptionKind::Pause, + kind: InternalInterruptionKind::Pause, + ack: tx, + }) + .await + .expect("Worker channel closed trying to pause task"); + } + + pub async fn suspend(&self, tx: oneshot::Sender<()>, has_suspended: Arc) { + self.is_paused.store(true, Ordering::Relaxed); + self.is_running.store(false, Ordering::Relaxed); + + trace!("Sending pause signal to Interrupter object on task"); + + self.interrupt_tx + .send(InterruptionRequest { + kind: InternalInterruptionKind::Suspend(has_suspended), ack: tx, }) .await @@ -626,7 +669,7 @@ impl TaskWorktable { self.interrupt_tx .send(InterruptionRequest { - kind: InterruptionKind::Cancel, + kind: InternalInterruptionKind::Cancel, ack: tx, }) .await diff --git a/crates/task-system/src/worker/runner.rs b/crates/task-system/src/worker/runner.rs index 675204c9fa55..d6d734377aa4 100644 --- a/crates/task-system/src/worker/runner.rs +++ b/crates/task-system/src/worker/runner.rs @@ -687,6 +687,26 @@ impl Runner { if is_idle { trace!("Worker is idle, no tasks to shutdown: "); + assert!( + current_task_handle.is_none(), + "can't shutdown with a running task if we're idle" + ); + assert!( + tasks.is_empty(), + "can't shutdown with pending tasks if we're idle" + ); + assert!( + priority_tasks.is_empty(), + "can't shutdown with priority tasks if we're idle" + ); + assert!( + suspended_task.is_none(), + "can't shutdown with a suspended task if we're idle" + ); + + for paused_task in paused_tasks.into_values() { + send_shutdown_task_response(worker_id, &paused_task.task.id(), paused_task); + } } else { trace!("Worker is busy, will shutdown tasks: "); @@ -1247,21 +1267,13 @@ fn handle_task_suspension( trace!("Suspend signal received: "); - // The interrupter only knows about Pause and Cancel commands, we use pause as - // the suspend task feature should be invisible to the user - worktable.pause(tx).await; + worktable.suspend(tx, has_suspended).await; - match rx.await { - Ok(()) => { - trace!("Suspending: "); - has_suspended.store(true, Ordering::Relaxed); - } - Err(_) => { - // The task probably finished before we could suspend it so the channel was dropped - trace!( - "Suspend channel closed: " - ); - } + if rx.await.is_ok() { + trace!("Suspending: "); + } else { + // The task probably finished before we could suspend it so the channel was dropped + trace!("Suspend channel closed: "); } } else { trace!( From a690432cce79ea6fc8dca14629b3214e8227856c Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Thu, 16 May 2024 02:19:46 -0300 Subject: [PATCH 12/33] Some fixes on job progress reporting and save --- .../heavy-lifting/src/file_identifier/job.rs | 8 ++ core/crates/heavy-lifting/src/indexer/job.rs | 22 +++- .../heavy-lifting/src/job_system/runner.rs | 114 ++++++++++-------- .../heavy-lifting/src/media_processor/job.rs | 68 +++++++++-- .../src/media_processor/tasks/thumbnailer.rs | 5 +- core/src/api/jobs.rs | 2 - core/src/context.rs | 30 ++++- 7 files changed, 178 insertions(+), 71 deletions(-) diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index 0cc967ca3cfc..0d3480780855 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -271,6 +271,14 @@ impl FileIdentifier { self.metadata.seeking_orphans_time = start.elapsed(); } else { + ctx.progress(vec![ + ProgressUpdate::TaskCount(self.metadata.total_found_orphans), + ProgressUpdate::Message(format!( + "{} files to be identified", + self.metadata.total_found_orphans + )), + ]) + .await; pending_running_tasks.extend(mem::take(&mut self.pending_tasks_on_resume)); } diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs index ce27ed137332..aa89f1505c6a 100644 --- a/core/crates/heavy-lifting/src/indexer/job.rs +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -450,7 +450,7 @@ impl Indexer { self.metadata.total_tasks += handles.len() as u64; ctx.progress(vec![ - ProgressUpdate::TaskCount(handles.len() as u64), + ProgressUpdate::TaskCount(self.metadata.total_tasks), ProgressUpdate::message(format!( "Found {to_create_count} new files and {to_update_count} to update" )), @@ -551,7 +551,7 @@ impl Indexer { dispatcher: &JobTaskDispatcher, ) -> Result<(), indexer::Error> { // if we don't have any pending task, then this is a fresh job - if self.pending_tasks_on_resume.is_empty() { + let updates = if self.pending_tasks_on_resume.is_empty() { let walker_root_path = Arc::new( get_full_path_from_sub_path( self.location.id, @@ -578,10 +578,26 @@ impl Indexer { .await, ); + self.metadata.total_tasks = 1; + + let updates = vec![ + ProgressUpdate::TaskCount(self.metadata.total_tasks), + ProgressUpdate::Message(format!("Indexing {}", walker_root_path.display())), + ]; + self.walker_root_path = Some(walker_root_path); + + updates } else { pending_running_tasks.extend(mem::take(&mut self.pending_tasks_on_resume)); - } + + vec![ + ProgressUpdate::TaskCount(self.metadata.total_tasks), + ProgressUpdate::Message("Resuming tasks".to_string()), + ] + }; + + ctx.progress(updates).await; Ok(()) } diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index c328fae11d47..a4d719d2414d 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -79,6 +79,7 @@ struct JobsWorktables { } pub(super) struct JobSystemRunner> { + on_shutdown_mode: bool, base_dispatcher: BaseTaskDispatcher, handles: HashMap>, worktables: JobsWorktables, @@ -93,6 +94,7 @@ impl> JobSystemRunner)>, ) -> Self { Self { + on_shutdown_mode: false, base_dispatcher, handles: HashMap::with_capacity(JOBS_INITIAL_CAPACITY), worktables: JobsWorktables { @@ -253,6 +255,7 @@ impl> JobSystemRunner, ) -> Result<(), JobSystemError> { let Self { + on_shutdown_mode, handles, worktables, job_outputs_tx, @@ -272,8 +275,8 @@ impl> JobSystemRunner> JobSystemRunner { - let name = handle.ctx.report().await.name; - - let Some(next_jobs) = - serialize_next_jobs_to_shutdown(job_id, job_name, handle.next_jobs).await - else { - return Ok(()); - }; - - worktables - .jobs_to_store_by_ctx_id - .entry(handle.ctx.id()) - .or_default() - .push(StoredJobEntry { - location_id, - root_job: StoredJob { - id: job_id, - name, - serialized_job, - }, - next_jobs, - }); + Ok(ReturnStatus::Shutdown(res)) => { + match res { + Ok(Some(serialized_job)) => { + let name = { + let db = handle.ctx.db(); + let mut report = handle.ctx.report_mut().await; + if let Err(e) = report.update(db).await { + error!("failed to update report on job shutdown: {e:#?}"); + } + report.name + }; + + worktables + .jobs_to_store_by_ctx_id + .entry(handle.ctx.id()) + .or_default() + .push(StoredJobEntry { + location_id, + root_job: StoredJob { + id: job_id, + name, + serialized_job, + }, + next_jobs: serialize_next_jobs_to_shutdown( + job_id, + job_name, + handle.next_jobs, + ) + .await + .unwrap_or_default(), + }); + + debug!("Job was shutdown and serialized: "); + } - debug!("Job was shutdown and serialized: "); + Ok(None) => { + debug!( + "Job was shutdown but didn't returned any serialized data, \ + probably it isn't resumable job: " + ); + } - return Ok(()); - } + Err(e) => { + error!("Failed to serialize job: {e:#?}"); + } + } - Ok(ReturnStatus::Shutdown(Ok(None))) => { - debug!( - "Job was shutdown but didn't returned any serialized data, \ - probably it isn't resumable job: " - ); - return Ok(()); - } + if *on_shutdown_mode && handles.is_empty() { + // Job system is empty and in shutdown mode so we close this channel to finish the shutdown process + job_return_status_tx.close(); + } - Ok(ReturnStatus::Shutdown(Err(e))) => { - error!("Failed to serialize job: {e:#?}"); return Ok(()); } @@ -594,27 +611,26 @@ pub(super) async fn run>( } StreamMessage::RunnerMessage(RunnerMessage::Shutdown) => { + runner.on_shutdown_mode = true; // Consuming all pending return status messages - loop { - while let Ok((job_id, status)) = job_return_status_rx_to_shutdown.try_recv() { - if let Err(e) = runner.process_return_status(job_id, status).await { - error!("Failed to process return status before shutting down: {e:#?}"); - } - } - - if runner.is_empty() { - break; - } + if !runner.is_empty() { + let mut job_return_status_stream = pin!(job_return_status_rx_to_shutdown); debug!( "Waiting for {} jobs to shutdown before shutting down the job system...", runner.total_jobs() ); - } - // Now the runner can shutdown - if let Err(e) = runner.save_jobs(store_jobs_file).await { - error!("Failed to save jobs before shutting down: {e:#?}"); + while let Some((job_id, status)) = job_return_status_stream.next().await { + if let Err(e) = runner.process_return_status(job_id, status).await { + error!("Failed to process return status before shutting down: {e:#?}"); + } + } + + // Now the runner can shutdown + if let Err(e) = runner.save_jobs(store_jobs_file).await { + error!("Failed to save jobs before shutting down: {e:#?}"); + } } return; diff --git a/core/crates/heavy-lifting/src/media_processor/job.rs b/core/crates/heavy-lifting/src/media_processor/job.rs index 977c6fca3cd8..9ed6df99fe2d 100644 --- a/core/crates/heavy-lifting/src/media_processor/job.rs +++ b/core/crates/heavy-lifting/src/media_processor/job.rs @@ -38,7 +38,7 @@ use itertools::Itertools; use prisma_client_rust::{raw, PrismaValue}; use serde::{Deserialize, Serialize}; use serde_json::json; -use tracing::{debug, error, warn}; +use tracing::{debug, warn}; use super::{ helpers, @@ -85,6 +85,7 @@ pub struct MediaProcessor { sub_path: Option, regenerate_thumbnails: bool, + total_media_data_extraction_files: u64, total_media_data_extraction_tasks: u64, total_thumbnailer_tasks: u64, total_thumbnailer_files: u64, @@ -209,6 +210,7 @@ impl MediaProcessor { location: Arc::new(location), sub_path, regenerate_thumbnails, + total_media_data_extraction_files: 0, total_media_data_extraction_tasks: 0, total_thumbnailer_tasks: 0, total_thumbnailer_files: 0, @@ -260,6 +262,7 @@ impl MediaProcessor { dispatcher, ) .await?; + self.total_media_data_extraction_files = total_media_data_extraction_files; self.total_media_data_extraction_tasks = task_handles.len() as u64; pending_running_tasks.extend(task_handles); @@ -276,7 +279,7 @@ impl MediaProcessor { .await; // Now we dispatch thumbnailer tasks - let (total_thumbnailer_tasks, task_handles) = dispatch_thumbnailer_tasks( + let (total_thumbnailer_files, task_handles) = dispatch_thumbnailer_tasks( &iso_file_path, self.regenerate_thumbnails, &self.location_path, @@ -284,10 +287,42 @@ impl MediaProcessor { job_ctx, ) .await?; - pending_running_tasks.extend(task_handles); - self.total_thumbnailer_tasks = total_thumbnailer_tasks; + self.total_thumbnailer_tasks = task_handles.len() as u64; + self.total_thumbnailer_files = total_thumbnailer_files; + + pending_running_tasks.extend(task_handles); } else { + let updates = match self.phase { + Phase::MediaDataExtraction => vec![ + ProgressUpdate::TaskCount(self.total_media_data_extraction_files), + ProgressUpdate::CompletedTaskCount( + self.metadata.media_data_metrics.extracted + + self.metadata.media_data_metrics.skipped, + ), + ProgressUpdate::Phase(self.phase.to_string()), + ProgressUpdate::Message(format!( + "Preparing to process {} files in {} chunks", + self.total_media_data_extraction_files, + self.total_media_data_extraction_tasks + )), + ], + Phase::ThumbnailGeneration => vec![ + ProgressUpdate::TaskCount(self.total_thumbnailer_files), + ProgressUpdate::CompletedTaskCount( + self.metadata.thumbnailer_metrics_acc.generated + + self.metadata.thumbnailer_metrics_acc.skipped, + ), + ProgressUpdate::Phase(self.phase.to_string()), + ProgressUpdate::Message(format!( + "Preparing to process {} files in {} chunks", + self.total_thumbnailer_files, self.total_thumbnailer_tasks + )), + ], + }; + + job_ctx.progress(updates).await; + pending_running_tasks.extend(mem::take(&mut self.pending_tasks_on_resume)); } @@ -412,12 +447,20 @@ impl MediaProcessor { self.errors.extend(errors); - job_ctx - .progress(vec![ProgressUpdate::CompletedTaskCount( - self.metadata.thumbnailer_metrics_acc.generated - + self.metadata.thumbnailer_metrics_acc.skipped, - )]) - .await; + debug!( + "Processed {}/{} thumbnailer tasks", + self.metadata.thumbnailer_metrics_acc.total_successful_tasks, + self.total_thumbnailer_tasks + ); + + if matches!(self.phase, Phase::ThumbnailGeneration) { + job_ctx + .progress(vec![ProgressUpdate::CompletedTaskCount( + self.metadata.thumbnailer_metrics_acc.generated + + self.metadata.thumbnailer_metrics_acc.skipped, + )]) + .await; + } // if self.total_thumbnailer_tasks // == self.metadata.thumbnailer_metrics_acc.total_successful_tasks @@ -550,6 +593,7 @@ struct SaveState { sub_path: Option, regenerate_thumbnails: bool, + total_media_data_extraction_files: u64, total_media_data_extraction_tasks: u64, total_thumbnailer_tasks: u64, total_thumbnailer_files: u64, @@ -570,6 +614,7 @@ impl SerializableJob for MediaProcessor { location_path, sub_path, regenerate_thumbnails, + total_media_data_extraction_files, total_media_data_extraction_tasks, total_thumbnailer_tasks, total_thumbnailer_files, @@ -585,6 +630,7 @@ impl SerializableJob for MediaProcessor { location_path, sub_path, regenerate_thumbnails, + total_media_data_extraction_files, total_media_data_extraction_tasks, total_thumbnailer_tasks, total_thumbnailer_files, @@ -628,6 +674,7 @@ impl SerializableJob for MediaProcessor { location_path, sub_path, regenerate_thumbnails, + total_media_data_extraction_files, total_media_data_extraction_tasks, total_thumbnailer_tasks, total_thumbnailer_files, @@ -643,6 +690,7 @@ impl SerializableJob for MediaProcessor { location_path, sub_path, regenerate_thumbnails, + total_media_data_extraction_files, total_media_data_extraction_tasks, total_thumbnailer_tasks, total_thumbnailer_files, diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs index e410fbeb84e2..6deab2125e83 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs @@ -41,11 +41,10 @@ use std::{ use futures::{stream::FuturesUnordered, FutureExt, StreamExt}; use futures_concurrency::future::Race; - use serde::{Deserialize, Serialize}; use specta::Type; use tokio::time::{sleep, Instant}; -use tracing::{error, info}; +use tracing::{error, trace}; use uuid::Uuid; pub type ThumbnailId = u32; @@ -175,7 +174,7 @@ impl Task for Thumbnailer { .sqrt(), ); - info!( + trace!( "{{generated: {generated}, skipped: {skipped}}} thumbnails; \ mean generation time: {mean_generation_time:?} ± {generation_time_std_dev:?}", generated = output.generated, diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index 516c67f778e6..8e16ac7024ed 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -158,8 +158,6 @@ pub(crate) fn mount() -> AlphaRouter { let mut groups_vec = groups.into_values().collect::>(); groups_vec.sort_by(|a, b| b.created_at.cmp(&a.created_at)); - tracing::debug!("{groups_vec:#?}"); - Ok(groups_vec) }) }) diff --git a/core/src/context.rs b/core/src/context.rs index b394f680af9d..995e09ac4af8 100644 --- a/core/src/context.rs +++ b/core/src/context.rs @@ -4,18 +4,20 @@ use sd_core_heavy_lifting::{ job_system::report::{Report, Status}, OuterContext, ProgressUpdate, UpdateEvent, }; -use tracing::trace; use std::{ ops::{Deref, DerefMut}, - sync::Arc, + sync::{ + atomic::{AtomicU8, Ordering}, + Arc, + }, }; use chrono::{DateTime, Utc}; +use tokio::{spawn, sync::RwLock}; +use tracing::{error, trace}; use uuid::Uuid; -use tokio::sync::RwLock; - #[derive(Clone)] pub struct NodeContext { pub node: Arc, @@ -82,6 +84,7 @@ pub struct JobContext { outer_ctx: OuterCtx, report: Arc>, start_time: DateTime, + report_update_counter: Arc, } impl OuterContext for JobContext { @@ -122,6 +125,7 @@ impl sd_core_heavy_lifting::JobContext< report: Arc::new(RwLock::new(report)), outer_ctx, start_time: Utc::now(), + report_update_counter: Arc::new(AtomicU8::new(0)), } } @@ -174,6 +178,24 @@ impl sd_core_heavy_lifting::JobContext< let library = self.outer_ctx.library(); + let counter = self.report_update_counter.fetch_add(1, Ordering::AcqRel); + + if counter == 50 || counter == 0 { + self.report_update_counter.store(1, Ordering::Release); + + spawn({ + let db = Arc::clone(&library.db); + let mut report = report.clone(); + async move { + if let Err(e) = report.update(&db).await { + error!( + "Failed to update job report on debounced job progress event: {e:#?}" + ); + } + } + }); + } + // emit a CoreEvent library.emit(CoreEvent::JobProgress(JobProgressEvent { id: report.id, From 97744e89ff87424db0a340f6b8c43dd6c33c01fb Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Fri, 17 May 2024 18:55:52 -0300 Subject: [PATCH 13/33] Fixed many race conditions and a hard deadlock Also some progress report polishing --- .../heavy-lifting/src/file_identifier/job.rs | 77 +++++++---- .../file_identifier/tasks/object_processor.rs | 4 +- core/crates/heavy-lifting/src/indexer/job.rs | 13 +- .../heavy-lifting/src/job_system/mod.rs | 17 +++ .../heavy-lifting/src/job_system/runner.rs | 26 +++- .../media_processor/helpers/thumbnailer.rs | 4 +- .../heavy-lifting/src/media_processor/job.rs | 68 +++------- .../heavy-lifting/src/media_processor/mod.rs | 42 ++++++ .../src/media_processor/shallow.rs | 85 +++++++++---- .../src/media_processor/tasks/thumbnailer.rs | 52 +++----- core/src/api/jobs.rs | 9 +- crates/task-system/src/system.rs | 83 +++++++++--- crates/task-system/src/task.rs | 58 +++++++-- crates/task-system/src/worker/mod.rs | 2 +- crates/task-system/src/worker/run.rs | 52 ++++++-- crates/task-system/src/worker/runner.rs | 120 +++++++++++------- packages/client/src/core.ts | 2 +- 17 files changed, 481 insertions(+), 233 deletions(-) diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index 0d3480780855..eab71967fa55 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -35,7 +35,7 @@ use futures_concurrency::future::TryJoin; use serde::{Deserialize, Serialize}; use serde_json::json; use tokio::time::Instant; -use tracing::{trace, warn}; +use tracing::{debug, error, trace, warn}; use super::{ orphan_path_filters_deep, orphan_path_filters_shallow, @@ -269,10 +269,22 @@ impl FileIdentifier { ) .await?; + // Multiplying by 2 as each batch will have 2 tasks + self.metadata.total_tasks *= 2; + + ctx.progress(vec![ + ProgressUpdate::TaskCount(self.metadata.total_tasks), + ProgressUpdate::Message(format!( + "{} files to be identified", + self.metadata.total_found_orphans + )), + ]) + .await; + self.metadata.seeking_orphans_time = start.elapsed(); } else { ctx.progress(vec![ - ProgressUpdate::TaskCount(self.metadata.total_found_orphans), + ProgressUpdate::TaskCount(self.metadata.total_tasks), ProgressUpdate::Message(format!( "{} files to be identified", self.metadata.total_found_orphans @@ -336,10 +348,14 @@ impl FileIdentifier { dispatcher: &JobTaskDispatcher, ) -> Option> { self.metadata.extract_metadata_time += extract_metadata_time; - self.errors.extend(errors); - if identified_files.is_empty() { - self.metadata.completed_tasks += 1; + if !errors.is_empty() { + error!("Non critical errors while extracting metadata: {errors:#?}"); + self.errors.extend(errors); + } + + let maybe_task = if identified_files.is_empty() { + self.metadata.completed_tasks += 2; // Adding 2 as we will not have an ObjectProcessorTask ctx.progress(vec![ProgressUpdate::CompletedTaskCount( self.metadata.completed_tasks, @@ -348,8 +364,13 @@ impl FileIdentifier { None } else { - ctx.progress_msg(format!("Identified {} files", identified_files.len())) - .await; + self.metadata.completed_tasks += 1; + + ctx.progress(vec![ + ProgressUpdate::CompletedTaskCount(self.metadata.completed_tasks), + ProgressUpdate::Message(format!("Identified {} files", identified_files.len())), + ]) + .await; let with_priority = self.priority_tasks_ids.remove(&task_id); @@ -367,7 +388,14 @@ impl FileIdentifier { } Some(task) - } + }; + + debug!( + "Processed {}/{} file identifier tasks, took: {extract_metadata_time:?}", + self.metadata.completed_tasks, self.metadata.total_tasks, + ); + + maybe_task } async fn process_object_processor_output( @@ -408,6 +436,16 @@ impl FileIdentifier { file_path_ids: file_path_ids_with_new_object, }); } + + debug!( + "Processed {}/{} file identifier tasks, took: {:?}", + self.metadata.completed_tasks, + self.metadata.total_tasks, + assign_cas_ids_time + + fetch_existing_objects_time + + assign_to_existing_object_time + + create_object_time, + ); } async fn dispatch_priority_identifier_tasks( @@ -450,14 +488,7 @@ impl FileIdentifier { *last_orphan_file_path_id = Some(orphan_paths.last().expect("orphan_paths is not empty").id); - ctx.progress(vec![ - ProgressUpdate::TaskCount(self.metadata.total_found_orphans), - ProgressUpdate::Message(format!( - "{} files to be identified", - self.metadata.total_found_orphans - )), - ]) - .await; + self.metadata.total_tasks += 1; let priority_task = dispatcher .dispatch(ExtractFileMetadataTask::new( @@ -521,14 +552,7 @@ impl FileIdentifier { self.metadata.total_found_orphans += orphan_paths.len() as u64; - ctx.progress(vec![ - ProgressUpdate::TaskCount(self.metadata.total_found_orphans), - ProgressUpdate::Message(format!( - "{} files to be identified", - self.metadata.total_found_orphans - )), - ]) - .await; + self.metadata.total_tasks += 1; pending_running_tasks.push( dispatcher @@ -579,6 +603,7 @@ pub struct Metadata { created_objects_count: u64, linked_objects_count: u64, completed_tasks: u64, + total_tasks: u64, } impl From for Vec { @@ -594,6 +619,7 @@ impl From for Vec { created_objects_count, linked_objects_count, completed_tasks, + total_tasks, }: Metadata, ) -> Self { vec![ @@ -618,7 +644,8 @@ impl From for Vec { ("total_found_orphans".into(), json!(total_found_orphans)), ("created_objects_count".into(), json!(created_objects_count)), ("linked_objects_count".into(), json!(linked_objects_count)), - ("total_tasks".into(), json!(completed_tasks)), + ("completed_tasks".into(), json!(completed_tasks)), + ("total_tasks".into(), json!(total_tasks)), ])), ] } diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs index bdc826ddc95c..b3cb1343bec6 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs @@ -25,7 +25,7 @@ use std::{ use prisma_client_rust::Select; use serde::{Deserialize, Serialize}; use tokio::time::Instant; -use tracing::{debug, trace}; +use tracing::trace; use uuid::Uuid; use super::IdentifiedFile; @@ -153,7 +153,7 @@ impl Task for ObjectProcessorTask { *assign_to_existing_object_time = start.elapsed(); *linked_objects_count = assigned_file_path_pub_ids.len() as u64; - debug!( + trace!( "Found {} existing Objects, linked file paths to them", existing_objects_by_cas_id.len() ); diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs index aa89f1505c6a..509afd333e2c 100644 --- a/core/crates/heavy-lifting/src/indexer/job.rs +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -38,7 +38,7 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use serde_json::json; use tokio::time::Instant; -use tracing::warn; +use tracing::{debug, error, warn}; use super::{ remove_non_existing_file_paths, reverse_update_directories_sizes, @@ -401,7 +401,10 @@ impl Indexer { .map(|WalkedEntry { iso_file_path, .. }| iso_file_path.clone()), ); - self.errors.extend(errors); + if !errors.is_empty() { + error!("Non critical errors while indexing: {errors:#?}"); + self.errors.extend(errors); + } let db_delete_time = Instant::now(); self.metadata.removed_count += @@ -457,6 +460,8 @@ impl Indexer { ]) .await; + debug!("Processed walk task in the indexer, took: {scan_time:?}"); + Ok(handles) } @@ -472,6 +477,8 @@ impl Indexer { self.metadata.db_write_time += save_duration; ctx.progress_msg(format!("Saved {saved_count} files")).await; + + debug!("Processed save task in the indexer, took: {save_duration:?}"); } async fn process_update_output( @@ -487,6 +494,8 @@ impl Indexer { ctx.progress_msg(format!("Updated {updated_count} files")) .await; + + debug!("Processed update task in the indexer, took: {update_duration:?}"); } async fn process_handles( diff --git a/core/crates/heavy-lifting/src/job_system/mod.rs b/core/crates/heavy-lifting/src/job_system/mod.rs index f0f132fd8313..b50133638645 100644 --- a/core/crates/heavy-lifting/src/job_system/mod.rs +++ b/core/crates/heavy-lifting/src/job_system/mod.rs @@ -219,6 +219,23 @@ impl> JobSystem bool { + let ctx_id = ctx.id(); + + let (ack_tx, ack_rx) = oneshot::channel(); + self.msgs_tx + .send(RunnerMessage::HasActiveJobs { ctx_id, ack_tx }) + .await + .expect("runner msgs channel unexpectedly closed on has active jobs request"); + + ack_rx + .await + .expect("ack channel closed before receiving has active jobs response") + } + pub fn receive_job_outputs( &self, ) -> impl Stream)> { diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index a4d719d2414d..4e153756f5c6 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -68,6 +68,10 @@ pub(super) enum RunnerMessage, }, Shutdown, + HasActiveJobs { + ctx_id: Uuid, + ack_tx: oneshot::Sender, + }, } struct JobsWorktables { @@ -202,6 +206,9 @@ impl> JobSystemRunner> JobSystemRunner> JobSystemRunner bool { + self.handles + .values() + .any(|handle| handle.ctx.id() == ctx_id) + } } async fn serialize_next_jobs_to_shutdown>( @@ -576,6 +592,12 @@ pub(super) async fn run>( .expect("ack channel closed before sending new job response"); } + StreamMessage::RunnerMessage(RunnerMessage::HasActiveJobs { ctx_id, ack_tx }) => { + ack_tx + .send(runner.has_active_jobs(ctx_id)) + .expect("ack channel closed before sending has active jobs response"); + } + StreamMessage::RunnerMessage(RunnerMessage::GetActiveReports { ack_tx }) => { ack_tx .send(runner.get_active_reports().await) @@ -647,9 +669,7 @@ pub(super) async fn run>( } // Memory cleanup tick - StreamMessage::CleanMemoryTick => { - runner.clean_memory(); - } + StreamMessage::CleanMemoryTick => runner.clean_memory(), } } } diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs index 0409b529e5bb..6dcb1a6ba5cf 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs @@ -44,8 +44,8 @@ pub const TARGET_PX: f32 = 1_048_576.0; // 1024x1024 /// and is treated as a percentage (so 60% in this case, or it's the same as multiplying by `0.6`). pub const TARGET_QUALITY: f32 = 60.0; -/// How much time we allow for the thumbnail generation process to complete before we give up. -pub const THUMBNAIL_GENERATION_TIMEOUT: Duration = Duration::from_secs(60); +/// How much time we allow for the thumbnailer task to complete before we give up. +pub const THUMBNAILER_TASK_TIMEOUT: Duration = Duration::from_secs(60 * 5); pub fn get_thumbnails_directory(data_directory: impl AsRef) -> PathBuf { data_directory.as_ref().join(THUMBNAIL_CACHE_DIR_NAME) diff --git a/core/crates/heavy-lifting/src/media_processor/job.rs b/core/crates/heavy-lifting/src/media_processor/job.rs index 9ed6df99fe2d..0a217b349e52 100644 --- a/core/crates/heavy-lifting/src/media_processor/job.rs +++ b/core/crates/heavy-lifting/src/media_processor/job.rs @@ -15,10 +15,10 @@ use sd_core_prisma_helpers::file_path_for_media_processor; use sd_core_sync::Manager as SyncManager; use sd_file_ext::extensions::Extension; -use sd_prisma::prisma::{file_path, location, object, PrismaClient}; +use sd_prisma::prisma::{location, PrismaClient}; use sd_task_system::{ AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskOutput, - TaskStatus, + TaskStatus, TaskSystemError, }; use sd_utils::{db::maybe_missing, u64_to_frontend}; @@ -38,7 +38,7 @@ use itertools::Itertools; use prisma_client_rust::{raw, PrismaValue}; use serde::{Deserialize, Serialize}; use serde_json::json; -use tracing::{debug, warn}; +use tracing::{debug, error, warn}; use super::{ helpers, @@ -46,7 +46,7 @@ use super::{ self, media_data_extractor, thumbnailer::{self, NewThumbnailReporter}, }, - NewThumbnailsReporter, BATCH_SIZE, + NewThumbnailsReporter, RawFilePathForMediaProcessor, BATCH_SIZE, }; #[derive(Debug, Clone, Copy, Serialize, Deserialize)] @@ -361,6 +361,7 @@ impl MediaProcessor { } Err(e) => { + error!("Task System error: {e:#?}"); cancel_pending_tasks(&*pending_running_tasks).await; return Some(Err(e.into())); @@ -396,12 +397,16 @@ impl MediaProcessor { self.metadata.media_data_metrics.db_write_time += db_write_time; self.metadata.media_data_metrics.total_successful_tasks += 1; - self.errors.extend(errors); + if !errors.is_empty() { + error!("Non critical errors while extracting media data: {errors:#?}"); + self.errors.extend(errors); + } debug!( - "Processed {}/{} media data extraction tasks", + "Processed {}/{} media data extraction tasks, took: {:?}", self.metadata.media_data_metrics.total_successful_tasks, - self.total_media_data_extraction_tasks + self.total_media_data_extraction_tasks, + db_read_time + filtering_time + extraction_time + db_write_time, ); job_ctx .progress(vec![ProgressUpdate::CompletedTaskCount( @@ -445,10 +450,13 @@ impl MediaProcessor { self.metadata.thumbnailer_metrics_acc.std_dev_acc += std_dev_acc; self.metadata.thumbnailer_metrics_acc.total_successful_tasks += 1; - self.errors.extend(errors); + if !errors.is_empty() { + error!("Non critical errors while generating thumbnails: {errors:#?}"); + self.errors.extend(errors); + } debug!( - "Processed {}/{} thumbnailer tasks", + "Processed {}/{} thumbnailer tasks, took: {total_time:?}", self.metadata.thumbnailer_metrics_acc.total_successful_tasks, self.total_thumbnailer_tasks ); @@ -780,46 +788,6 @@ async fn get_all_children_files_by_extensions( parent_iso_file_path: &IsolatedFilePathData<'_>, extensions: &[Extension], ) -> Result, media_processor::Error> { - #[derive(Deserialize)] - struct RawFilePathForMediaProcessor { - id: file_path::id::Type, - materialized_path: file_path::materialized_path::Type, - is_dir: file_path::is_dir::Type, - name: file_path::name::Type, - extension: file_path::extension::Type, - cas_id: file_path::cas_id::Type, - object_id: object::id::Type, - object_pub_id: object::pub_id::Type, - } - - impl From for file_path_for_media_processor::Data { - fn from( - RawFilePathForMediaProcessor { - id, - materialized_path, - is_dir, - name, - extension, - cas_id, - object_id, - object_pub_id, - }: RawFilePathForMediaProcessor, - ) -> Self { - Self { - id, - materialized_path, - is_dir, - name, - extension, - cas_id, - object: Some(file_path_for_media_processor::object::Data { - id: object_id, - pub_id: object_pub_id, - }), - } - } - } - // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite // We have no data coming from the user, so this is sql injection safe db._query_raw::(raw!( @@ -894,6 +862,8 @@ async fn dispatch_thumbnailer_tasks( .iter() .position(|file_path| file_path.materialized_path != first_materialized_path); + // TODO debug why we have more priority tasks than we should + let non_priority_tasks = different_materialized_path_idx .map(|idx| { file_paths diff --git a/core/crates/heavy-lifting/src/media_processor/mod.rs b/core/crates/heavy-lifting/src/media_processor/mod.rs index 5cf401709f1e..9f480a3f5d3a 100644 --- a/core/crates/heavy-lifting/src/media_processor/mod.rs +++ b/core/crates/heavy-lifting/src/media_processor/mod.rs @@ -2,6 +2,8 @@ use crate::{utils::sub_path, OuterContext, UpdateEvent}; use sd_core_file_path_helper::FilePathError; +use sd_core_prisma_helpers::file_path_for_media_processor; +use sd_prisma::prisma::{file_path, object}; use sd_utils::db::MissingFieldError; use std::fmt; @@ -89,3 +91,43 @@ impl NewThumbnailReporter for NewThumbnailsReporter for file_path_for_media_processor::Data { + fn from( + RawFilePathForMediaProcessor { + id, + materialized_path, + is_dir, + name, + extension, + cas_id, + object_id, + object_pub_id, + }: RawFilePathForMediaProcessor, + ) -> Self { + Self { + id, + materialized_path, + is_dir, + name, + extension, + cas_id, + object: Some(file_path_for_media_processor::object::Data { + id: object_id, + pub_id: object_pub_id, + }), + } + } +} diff --git a/core/crates/heavy-lifting/src/media_processor/shallow.rs b/core/crates/heavy-lifting/src/media_processor/shallow.rs index 7f93ed3ddd61..bdb1515038a4 100644 --- a/core/crates/heavy-lifting/src/media_processor/shallow.rs +++ b/core/crates/heavy-lifting/src/media_processor/shallow.rs @@ -32,7 +32,7 @@ use super::{ self, media_data_extractor, thumbnailer::{self, NewThumbnailReporter}, }, - NewThumbnailsReporter, BATCH_SIZE, + NewThumbnailsReporter, RawFilePathForMediaProcessor, BATCH_SIZE, }; #[allow(clippy::missing_panics_doc)] // SAFETY: It doesn't actually panics @@ -69,38 +69,70 @@ pub async fn shallow( let mut errors = vec![]; - let mut futures = dispatch_media_data_extractor_tasks( + let media_data_extraction_tasks = dispatch_media_data_extractor_tasks( ctx.db(), ctx.sync(), &sub_iso_file_path, &location_path, dispatcher, ) - .await? - .into_iter() - .map(CancelTaskOnDrop::new) - .chain( + .await?; + + let total_media_data_extraction_tasks = media_data_extraction_tasks.len(); + + let thumbnailer_tasks = dispatch_thumbnailer_tasks(&sub_iso_file_path, false, &location_path, dispatcher, ctx) - .await? - .into_iter() - .map(CancelTaskOnDrop::new), - ) - .collect::>(); + .await?; + + let total_thumbnailer_tasks = thumbnailer_tasks.len(); + + let mut futures = media_data_extraction_tasks + .into_iter() + .chain(thumbnailer_tasks.into_iter()) + .map(CancelTaskOnDrop::new) + .collect::>(); + + let mut completed_media_data_extraction_tasks = 0; + let mut completed_thumbnailer_tasks = 0; while let Some(res) = futures.next().await { match res { Ok(TaskStatus::Done((_, TaskOutput::Out(out)))) => { if out.is::() { - errors.extend( - out.downcast::() - .expect("just checked") - .errors, + let media_data_extractor::Output { + db_read_time, + filtering_time, + extraction_time, + db_write_time, + errors: new_errors, + .. + } = *out + .downcast::() + .expect("just checked"); + + errors.extend(new_errors); + + completed_media_data_extraction_tasks += 1; + + debug!( + "Media data extraction task {completed_media_data_extraction_tasks}/\ + {total_media_data_extraction_tasks} completed in {:?}", + db_read_time + filtering_time + extraction_time + db_write_time ); } else if out.is::() { - errors.extend( - out.downcast::() - .expect("just checked") - .errors, + let thumbnailer::Output { + total_time, + errors: new_errors, + .. + } = *out.downcast::().expect("just checked"); + + errors.extend(new_errors); + + completed_thumbnailer_tasks += 1; + + debug!( + "Thumbnailer task {completed_thumbnailer_tasks}/{total_thumbnailer_tasks} \ + completed in {total_time:?}", ); } else { unreachable!( @@ -189,14 +221,22 @@ async fn get_files_by_extensions( ) -> Result, media_processor::Error> { // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite // We have no data coming from the user, so this is sql injection safe - db._query_raw(raw!( + db._query_raw::(raw!( &format!( - "SELECT id, materialized_path, is_dir, name, extension, cas_id, object_id + "SELECT + file_path.id, + file_path.materialized_path, + file_path.is_dir, + file_path.name, + file_path.extension, + file_path.cas_id, + object.id as 'object_id', + object.pub_id as 'object_pub_id' FROM file_path + INNER JOIN object ON object.id = file_path.object_id WHERE location_id={{}} AND cas_id IS NOT NULL - AND object_id IS NOT NULL AND LOWER(extension) IN ({}) AND materialized_path = {{}}", extensions @@ -214,6 +254,7 @@ async fn get_files_by_extensions( )) .exec() .await + .map(|raw_files| raw_files.into_iter().map(Into::into).collect()) .map_err(Into::into) } diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs index 6deab2125e83..1a0ccc21ef0c 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs @@ -12,8 +12,7 @@ use crate::{ media_processor::{ self, helpers::thumbnailer::{ - generate_thumbnail, GenerateThumbnailArgs, GenerationStatus, - THUMBNAIL_GENERATION_TIMEOUT, + generate_thumbnail, GenerateThumbnailArgs, GenerationStatus, THUMBNAILER_TASK_TIMEOUT, }, ThumbKey, ThumbnailKind, }, @@ -43,7 +42,7 @@ use futures::{stream::FuturesUnordered, FutureExt, StreamExt}; use futures_concurrency::future::Race; use serde::{Deserialize, Serialize}; use specta::Type; -use tokio::time::{sleep, Instant}; +use tokio::time::Instant; use tracing::{error, trace}; use uuid::Uuid; @@ -77,7 +76,7 @@ impl Task for Thumbnailer { } fn with_timeout(&self) -> Option { - Some(Duration::from_secs(60 * 5)) // The entire task must not take more than 5 minutes + Some(THUMBNAILER_TASK_TIMEOUT) // The entire task must not take more than this constant } async fn run(&mut self, interrupter: &Interrupter) -> Result { @@ -105,40 +104,27 @@ impl Task for Thumbnailer { let start = Instant::now(); - let mut futures = pin!(thumbnails_to_generate + let futures = thumbnails_to_generate .iter() .map(|(id, generate_args)| { - let path = &generate_args.path; - + generate_thumbnail( + thumbnails_directory_path, + generate_args, + thumbs_kind, + *should_regenerate, + ) + .map(|res| InterruptRace::Processed((*id, res))) + }) + .map(|fut| { ( - generate_thumbnail( - thumbnails_directory_path, - generate_args, - thumbs_kind, - *should_regenerate, - ) - .map(|res| (*id, res)), - sleep(THUMBNAIL_GENERATION_TIMEOUT).map(|()| { - ( - *id, - ( - THUMBNAIL_GENERATION_TIMEOUT, - Err(NonCriticalThumbnailerError::ThumbnailGenerationTimeout( - path.clone(), - )), - ), - ) - }), + fut, + interrupter.into_future().map(InterruptRace::Interrupted), ) .race() - .map(InterruptRace::Processed) }) - .map(|fut| ( - fut, - interrupter.into_future().map(InterruptRace::Interrupted) - ) - .race()) - .collect::>()); + .collect::>(); + + let mut futures = pin!(futures); while let Some(race_output) = futures.next().await { match race_output { @@ -214,8 +200,6 @@ pub enum NonCriticalThumbnailerError { CreateShardDirectory(String), #[error("failed to save thumbnail : {1}", .0.display())] SaveThumbnail(PathBuf, String), - #[error("thumbnail generation timed out ", .0.display())] - ThumbnailGenerationTimeout(PathBuf), } impl Thumbnailer { diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index 8e16ac7024ed..178748de28d0 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -164,7 +164,14 @@ pub(crate) fn mount() -> AlphaRouter { .procedure("isActive", { R.with2(library()) .query(|(node, library), _: ()| async move { - Ok(node.old_jobs.has_active_workers(library.id).await) + let library_id = library.id; + Ok(node + .job_system + .has_active_jobs(NodeContext { + node: Arc::clone(&node), + library, + }) + .await || node.old_jobs.has_active_workers(library_id).await) }) }) .procedure("clear", { diff --git a/crates/task-system/src/system.rs b/crates/task-system/src/system.rs index 7df19aeca5ba..7fea0a030692 100644 --- a/crates/task-system/src/system.rs +++ b/crates/task-system/src/system.rs @@ -163,7 +163,15 @@ impl System { ack, } => { trace!("Task system received a task resume request: "); - workers[worker_id].resume_task(task_id, ack).await; + spawn({ + let workers = Arc::clone(&workers); + async move { + workers[worker_id].resume_task(task_id, ack).await; + } + }); + trace!( + "Task system resumed task: " + ); } SystemMessage::PauseNotRunningTask { @@ -171,10 +179,18 @@ impl System { worker_id, ack, } => { - trace!("Task system received a task resume request: "); - workers[worker_id] - .pause_not_running_task(task_id, ack) - .await; + trace!("Task system received a task pause request: "); + spawn({ + let workers = Arc::clone(&workers); + async move { + workers[worker_id] + .pause_not_running_task(task_id, ack) + .await; + } + }); + trace!( + "Task system paused task: " + ); } SystemMessage::CancelNotRunningTask { @@ -182,10 +198,19 @@ impl System { worker_id, ack, } => { - trace!("Task system received a task resume request: "); - workers[worker_id] - .cancel_not_running_task(task_id, ack) - .await; + trace!("Task system received a task cancel request: "); + spawn({ + let workers = Arc::clone(&workers); + + async move { + workers[worker_id] + .cancel_not_running_task(task_id, ack) + .await; + } + }); + trace!( + "Task system canceled task: " + ); } SystemMessage::ForceAbortion { @@ -197,7 +222,16 @@ impl System { "Task system received a task force abortion request: \ " ); - workers[worker_id].force_task_abortion(task_id, ack).await; + spawn({ + let workers = Arc::clone(&workers); + + async move { + workers[worker_id].force_task_abortion(task_id, ack).await; + } + }); + trace!( + "Task system aborted task: " + ); } SystemMessage::NotifyIdleWorkers { @@ -209,17 +243,26 @@ impl System { " ); - for idx in (0..workers.len()) - .cycle() - .skip(start_from) - .take(usize::min(task_count, workers.len())) - { - if idle_workers[idx].load(Ordering::Relaxed) { - workers[idx].wake().await; - // we don't mark the worker as not idle because we wait for it to - // successfully steal a task and then report it back as active + spawn({ + let workers = Arc::clone(&workers); + let idle_workers = Arc::clone(&idle_workers); + + async move { + for idx in (0..workers.len()) + .cycle() + .skip(start_from) + .take(usize::min(task_count, workers.len())) + { + if idle_workers[idx].load(Ordering::Relaxed) { + trace!("Task system sending wake up request: "); + workers[idx].wake().await; + trace!("Task system waked up worker: "); + // we don't mark the worker as not idle because we wait for it to + // successfully steal a task and then report it back as active + } + } } - } + }); } SystemMessage::ShutdownRequest(tx) => { diff --git a/crates/task-system/src/task.rs b/crates/task-system/src/task.rs index 16b94fa22fa0..10814cc4292c 100644 --- a/crates/task-system/src/task.rs +++ b/crates/task-system/src/task.rs @@ -419,7 +419,10 @@ impl TaskRemoteController { let is_canceled = self.worktable.has_canceled.load(Ordering::Relaxed); let is_done = self.worktable.is_done.load(Ordering::Relaxed); - trace!("Received cancel command task: "); + trace!( + "Received cancel command task: ", + self.task_id + ); if !is_canceled && !is_done { if self.worktable.is_running.load(Ordering::Relaxed) { @@ -472,6 +475,7 @@ impl TaskRemoteController { | self.worktable.has_shutdown() | self.worktable.has_aborted() | self.worktable.has_canceled() + | self.worktable.has_failed() } } @@ -585,6 +589,7 @@ pub struct TaskWorktable { has_canceled: AtomicBool, has_aborted: AtomicBool, has_shutdown: AtomicBool, + has_failed: AtomicBool, interrupt_tx: chan::Sender, current_worker_id: AtomicWorkerId, } @@ -599,6 +604,7 @@ impl TaskWorktable { has_canceled: AtomicBool::new(false), has_aborted: AtomicBool::new(false), has_shutdown: AtomicBool::new(false), + has_failed: AtomicBool::new(false), interrupt_tx, current_worker_id: AtomicWorkerId::new(worker_id), } @@ -628,6 +634,11 @@ impl TaskWorktable { self.is_running.store(false, Ordering::Relaxed); } + pub fn set_failed(&self) { + self.has_failed.store(true, Ordering::Relaxed); + self.is_running.store(false, Ordering::Relaxed); + } + pub fn set_shutdown(&self) { self.has_shutdown.store(true, Ordering::Relaxed); self.is_running.store(false, Ordering::Relaxed); @@ -660,20 +671,20 @@ impl TaskWorktable { ack: tx, }) .await - .expect("Worker channel closed trying to pause task"); + .expect("Interrupter channel closed trying to pause task"); } pub async fn cancel(&self, tx: oneshot::Sender<()>) { - self.has_canceled.store(true, Ordering::Relaxed); - self.is_running.store(false, Ordering::Relaxed); - self.interrupt_tx .send(InterruptionRequest { kind: InternalInterruptionKind::Cancel, ack: tx, }) .await - .expect("Worker channel closed trying to pause task"); + .expect("Interrupter channel closed trying to pause task"); + + self.has_canceled.store(true, Ordering::Relaxed); + self.is_running.store(false, Ordering::Relaxed); } pub fn is_done(&self) -> bool { @@ -688,6 +699,10 @@ impl TaskWorktable { self.has_canceled.load(Ordering::Relaxed) } + pub fn has_failed(&self) -> bool { + self.has_failed.load(Ordering::Relaxed) + } + pub fn has_aborted(&self) -> bool { self.has_aborted.load(Ordering::Relaxed) } @@ -706,6 +721,12 @@ pub struct TaskWorkState { } impl TaskWorkState { + #[inline] + pub fn task_id(&self) -> TaskId { + self.task.id() + } + + #[inline] pub fn change_worker(&self, new_worker_id: WorkerId) { self.worktable .current_worker_id @@ -714,20 +735,27 @@ impl TaskWorkState { } #[derive(Debug)] -pub struct PanicOnSenderDrop( - Option, SystemError>>>, -); +pub struct PanicOnSenderDrop { + task_id: TaskId, + maybe_done_tx: Option, SystemError>>>, +} impl PanicOnSenderDrop { - pub fn new(done_tx: oneshot::Sender, SystemError>>) -> Self { - Self(Some(done_tx)) + pub fn new( + task_id: TaskId, + done_tx: oneshot::Sender, SystemError>>, + ) -> Self { + Self { + task_id, + maybe_done_tx: Some(done_tx), + } } pub fn send( mut self, res: Result, SystemError>, ) -> Result<(), Result, SystemError>> { - self.0 + self.maybe_done_tx .take() .expect("tried to send a task output twice to the same task handle") .send(res) @@ -737,8 +765,12 @@ impl PanicOnSenderDrop { impl Drop for PanicOnSenderDrop { fn drop(&mut self) { assert!( - self.0.is_none(), + self.maybe_done_tx.is_none(), "TaskHandle done channel dropped before sending a result" ); + trace!( + "TaskWorkState of task successfully dropped", + self.task_id + ); } } diff --git a/crates/task-system/src/worker/mod.rs b/crates/task-system/src/worker/mod.rs index 7ba9317d3e51..b238c5b0e206 100644 --- a/crates/task-system/src/worker/mod.rs +++ b/crates/task-system/src/worker/mod.rs @@ -122,7 +122,7 @@ impl Worker { task: new_task, worktable: Arc::clone(&worktable), interrupter: Arc::new(Interrupter::new(interrupt_rx)), - done_tx: PanicOnSenderDrop::new(done_tx), + done_tx: PanicOnSenderDrop::new(task_id, done_tx), })) .await .expect("Worker channel closed trying to add task"); diff --git a/crates/task-system/src/worker/run.rs b/crates/task-system/src/worker/run.rs index b9910c8496c2..531c3c0a0c71 100644 --- a/crates/task-system/src/worker/run.rs +++ b/crates/task-system/src/worker/run.rs @@ -5,7 +5,7 @@ use futures::StreamExt; use futures_concurrency::stream::Merge; use tokio::time::{interval_at, Instant}; use tokio_stream::wrappers::IntervalStream; -use tracing::{debug, error, warn}; +use tracing::{debug, error, trace, warn}; use super::{ super::{ @@ -17,19 +17,19 @@ use super::{ WorkStealer, WorkerId, ONE_SECOND, }; +enum StreamMessage { + Commands(WorkerMessage), + Steal(Option>), + TaskOutput(TaskOutputMessage), + IdleCheck, +} + pub(super) async fn run( id: WorkerId, system_comm: SystemComm, work_stealer: WorkStealer, msgs_rx: chan::Receiver>, ) { - enum StreamMessage { - Commands(WorkerMessage), - Steal(Option>), - TaskOutput(TaskOutputMessage), - IdleCheck, - } - let (mut runner, stole_task_rx, task_output_rx) = Runner::new(id, work_stealer, system_comm); let mut idle_checker_interval = interval_at(Instant::now(), ONE_SECOND); @@ -47,8 +47,11 @@ pub(super) async fn run( match msg { // Worker messages StreamMessage::Commands(WorkerMessage::NewTask(task_work_state)) => { + let task_id = task_work_state.task_id(); runner.abort_steal_task(); + trace!("New task received: "); runner.new_task(task_work_state).await; + trace!("New task added: "); } StreamMessage::Commands(WorkerMessage::TaskCountRequest(tx)) => { @@ -58,9 +61,11 @@ pub(super) async fn run( } StreamMessage::Commands(WorkerMessage::ResumeTask { task_id, ack }) => { + trace!("Resume task request received: "); if ack.send(runner.resume_task(task_id).await).is_err() { warn!("Resume task channel closed before sending ack"); } + trace!("Resumed task: "); } StreamMessage::Commands(WorkerMessage::PauseNotRunningTask { task_id, ack }) => { @@ -70,16 +75,23 @@ pub(super) async fn run( } StreamMessage::Commands(WorkerMessage::CancelNotRunningTask { task_id, ack }) => { - runner.cancel_not_running_task(task_id); + runner.cancel_not_running_task(&task_id); if ack.send(()).is_err() { warn!("Resume task channel closed before sending ack"); } } StreamMessage::Commands(WorkerMessage::ForceAbortion { task_id, ack }) => { - if ack.send(runner.force_task_abortion(task_id).await).is_err() { + trace!( + "Force abortion task request received: " + ); + if ack + .send(runner.force_task_abortion(&task_id).await) + .is_err() + { warn!("Force abortion channel closed before sending ack"); } + trace!("Force aborted task: "); } StreamMessage::Commands(WorkerMessage::ShutdownRequest(tx)) => { @@ -90,31 +102,43 @@ pub(super) async fn run( ack, stolen_task_tx, }) => { + trace!("Steal task request received: "); if ack .send(runner.steal_request(stolen_task_tx).await) .is_err() { debug!("Steal request attempt aborted before sending ack"); } + trace!("Steal task request completed: "); } StreamMessage::Commands(WorkerMessage::WakeUp) => runner.wake_up(), // Runner messages StreamMessage::TaskOutput(TaskOutputMessage(task_id, Ok(output))) => { - runner.process_task_output(task_id, output).await; + trace!( + "Process task output request received: " + ); + runner.process_task_output(&task_id, output).await; + trace!("Processed task output: "); } StreamMessage::TaskOutput(TaskOutputMessage(task_id, Err(()))) => { error!("Task failed "); - runner.clean_suspended_task(task_id); - - runner.dispatch_next_task(task_id).await; + runner.clear_errored_task(task_id).await; + trace!("Failed task cleared: "); } StreamMessage::Steal(maybe_stolen_task) => { + let maybe_task_id = maybe_stolen_task + .as_ref() + .map(|StoleTaskMessage(task_work_state)| task_work_state.task_id()); + trace!("Received stolen task request: "); runner.process_stolen_task(maybe_stolen_task).await; + trace!( + "Processed stolen task: " + ); } // Idle checking to steal some work diff --git a/crates/task-system/src/worker/runner.rs b/crates/task-system/src/worker/runner.rs index d6d734377aa4..7175523d83e7 100644 --- a/crates/task-system/src/worker/runner.rs +++ b/crates/task-system/src/worker/runner.rs @@ -358,14 +358,14 @@ impl Runner { false } - pub(super) fn cancel_not_running_task(&mut self, task_id: TaskId) { + pub(super) fn cancel_not_running_task(&mut self, task_id: &TaskId) { trace!( "Cancel not running task request: ", self.worker_id ); if let Some(current_task) = &self.current_task_handle { - if current_task.task_id == task_id { + if current_task.task_id == *task_id { trace!( "Task began to run before we managed to cancel it, run function will cancel it: \ ", @@ -375,8 +375,12 @@ impl Runner { } } + // We only remove from task_kinds as if the task is already running, it will be removed when we + // process its cancelled output later + self.task_kinds.remove(task_id); + if let Some(suspended_task) = &self.suspended_task { - if suspended_task.task.id() == task_id { + if suspended_task.task.id() == *task_id { trace!( "Task is already suspended but will be canceled: ", self.worker_id @@ -396,11 +400,12 @@ impl Runner { // If the task is not found, then it's possible that the user already canceled it but still have the handle } - fn cancel_task_from_queues(&mut self, task_id: TaskId) { + #[inline] + fn cancel_task_from_queues(&mut self, task_id: &TaskId) { if let Some(index) = self .priority_tasks .iter() - .position(|task_work_state| task_work_state.task.id() == task_id) + .position(|task_work_state| task_work_state.task.id() == *task_id) { send_cancel_task_response( self.worker_id, @@ -415,7 +420,7 @@ impl Runner { if let Some(index) = self .tasks .iter() - .position(|task_work_state| task_work_state.task.id() == task_id) + .position(|task_work_state| task_work_state.task.id() == *task_id) { send_cancel_task_response( self.worker_id, @@ -566,10 +571,10 @@ impl Runner { pub(super) async fn force_task_abortion( &mut self, - task_id: uuid::Uuid, + task_id: &TaskId, ) -> Result<(), SystemError> { if let Some(AbortAndSuspendSignalers { abort_tx, .. }) = - self.abort_and_suspend_map.remove(&task_id) + self.abort_and_suspend_map.remove(task_id) { let (tx, rx) = oneshot::channel(); @@ -587,7 +592,7 @@ impl Runner { // If the sender was dropped, then the task finished before we could // abort it which is fine Ok(Err(_)) => Ok(()), - Err(_) => Err(SystemError::TaskForcedAbortTimeout(task_id)), + Err(_) => Err(SystemError::TaskForcedAbortTimeout(*task_id)), } } } else { @@ -597,7 +602,7 @@ impl Runner { ); if let Some(current_task) = &self.current_task_handle { - if current_task.task_id == task_id { + if current_task.task_id == *task_id { trace!( "Task began to run before we managed to abort it, run function will abort it: \ ", @@ -607,8 +612,10 @@ impl Runner { } } + self.task_kinds.remove(task_id); + if let Some(suspended_task) = &self.suspended_task { - if suspended_task.task.id() == task_id { + if suspended_task.task.id() == *task_id { trace!( "Task is already suspended but will be force aborted: ", self.worker_id @@ -626,7 +633,7 @@ impl Runner { if let Some(index) = self .priority_tasks .iter() - .position(|task_work_state| task_work_state.task.id() == task_id) + .position(|task_work_state| task_work_state.task.id() == *task_id) { send_forced_abortion_task_response( self.worker_id, @@ -641,7 +648,7 @@ impl Runner { if let Some(index) = self .tasks .iter() - .position(|task_work_state| task_work_state.task.id() == task_id) + .position(|task_work_state| task_work_state.task.id() == *task_id) { send_forced_abortion_task_response( self.worker_id, @@ -940,14 +947,14 @@ impl Runner { } #[inline] - pub(super) async fn dispatch_next_task(&mut self, finished_task_id: TaskId) { + pub(super) async fn dispatch_next_task(&mut self, finished_task_id: &TaskId) { trace!( "Task finished and will try to process a new task: \ ", self.worker_id ); - self.abort_and_suspend_map.remove(&finished_task_id); + self.abort_and_suspend_map.remove(finished_task_id); let RunningTask { task_id: old_task_id, @@ -959,7 +966,7 @@ impl Runner { .take() .expect("Task handle missing, but task output received"); - assert_eq!(finished_task_id, old_task_id, "Task output id mismatch"); + assert_eq!(*finished_task_id, old_task_id, "Task output id mismatch"); trace!( "Waiting task handle: ", @@ -997,6 +1004,11 @@ impl Runner { self.is_idle = true; self.system_comm.idle_report(self.worker_id).await; + trace!( + "Worker reported idle status: ", + self.worker_id + ); + if self.current_steal_task_handle.is_none() { self.current_steal_task_handle = Some(dispatch_steal_request( self.worker_id, @@ -1014,7 +1026,7 @@ impl Runner { pub(super) async fn process_task_output( &mut self, - task_id: TaskId, + task_id: &TaskId, TaskRunnerOutput { task_work_state, status, @@ -1022,12 +1034,12 @@ impl Runner { ) { match status { InternalTaskExecStatus::Done(out) => { - self.task_kinds.remove(&task_id); - send_complete_task_response(self.worker_id, &task_id, task_work_state, out); + self.task_kinds.remove(task_id); + send_complete_task_response(self.worker_id, task_id, task_work_state, out); } InternalTaskExecStatus::Paused => { - self.paused_tasks.insert(task_id, task_work_state); + self.paused_tasks.insert(*task_id, task_work_state); trace!( "Task paused: ", self.worker_id @@ -1035,12 +1047,12 @@ impl Runner { } InternalTaskExecStatus::Canceled => { - self.task_kinds.remove(&task_id); + self.task_kinds.remove(task_id); send_cancel_task_response(self.worker_id, task_work_state); } InternalTaskExecStatus::Error(e) => { - self.task_kinds.remove(&task_id); + self.task_kinds.remove(task_id); send_error_task_response(self.worker_id, task_work_state, e); } @@ -1121,7 +1133,7 @@ impl Runner { } if self.task_kinds.capacity() > TASK_QUEUE_INITIAL_SIZE { - assert_eq!(self.task_kinds.len(), self.paused_tasks.len()); + assert_eq!(self.task_kinds.len(), self.paused_tasks.len(), "If we're idle, the number of task_kinds MUST be equal to the number of paused tasks"); self.task_kinds.shrink_to(TASK_QUEUE_INITIAL_SIZE); } @@ -1175,9 +1187,9 @@ impl Runner { } } - pub(crate) fn clean_suspended_task(&mut self, task_id: uuid::Uuid) { + pub(crate) fn clean_suspended_task(&mut self, task_id: &TaskId) { match self.waiting_suspension { - WaitingSuspendedTask::Task(waiting_task_id) if waiting_task_id == task_id => { + WaitingSuspendedTask::Task(waiting_task_id) if waiting_task_id == *task_id => { trace!( "Task was suspended and will be cleaned: ", self.worker_id @@ -1193,6 +1205,14 @@ impl Runner { WaitingSuspendedTask::None => {} } } + + pub(crate) async fn clear_errored_task(&mut self, task_id: TaskId) { + self.task_kinds.remove(&task_id); + + self.clean_suspended_task(&task_id); + + self.dispatch_next_task(&task_id).await; + } } type RunTaskOutput = (Box>, Result, SystemError>); @@ -1209,25 +1229,33 @@ fn handle_run_task_attempt( let already_canceled = worktable.has_canceled(); let already_aborted = worktable.has_aborted(); - async move { - if already_paused { - trace!( - "Task was paused before running: " - ); + let early_result = if already_paused { + trace!( + "Task was paused before running: " + ); - (task, Ok(Ok(ExecStatus::Paused))) - } else if already_canceled { - trace!( - "Task was canceled before running: " - ); + Some(Ok(Ok(ExecStatus::Paused))) + } else if already_canceled { + trace!( + "Task was canceled before running: " + ); - (task, Ok(Ok(ExecStatus::Canceled))) - } else if already_aborted { - trace!( - "Task was aborted before running: " - ); + Some(Ok(Ok(ExecStatus::Canceled))) + } else if already_aborted { + trace!( + "Task was aborted before running: " + ); + + Some(Err(SystemError::TaskAborted(task_id))) + } else { + // We can mark that the task has actually started now + worktable.set_started(); + None + }; - (task, Err(SystemError::TaskAborted(task_id))) + async move { + if let Some(res) = early_result { + (task, res) } else { let run_result = if let Some(timeout_duration) = task.with_timeout() { (task.run(&interrupter).map(Ok), async move { @@ -1329,12 +1357,14 @@ async fn emit_task_completed_message( } (_, Err(e)) => { - trace!("Task had an error: "); + error!("Task had an error: : {e:#?}"); if done_tx .send(if matches!(e, SystemError::TaskAborted(_)) { + worktable.set_aborted(); Ok(TaskStatus::ForcedAbortion) } else { + worktable.set_failed(); Err(e) }) .is_err() @@ -1369,8 +1399,6 @@ async fn run_single_task( let task_id = task.id(); - worktable.set_started(); - trace!("Running task: "); let handle = handle_run_task_attempt( @@ -1426,6 +1454,8 @@ async fn run_single_task( error!("Task done channel closed while sending join error response"); } + worktable.set_failed(); + if task_output_tx .send(TaskOutputMessage(task_id, Err(()))) .await @@ -1444,6 +1474,8 @@ async fn run_single_task( error!("Task done channel closed while sending abort error response"); } + worktable.set_aborted(); + if task_output_tx .send(TaskOutputMessage(task_id, Err(()))) .await diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index 363fee3ce52e..eff104b76db5 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -491,7 +491,7 @@ export type NonCriticalMediaDataExtractorError = { FailedToExtractImageMediaData export type NonCriticalMediaProcessorError = { media_data_extractor: NonCriticalMediaDataExtractorError } | { thumbnailer: NonCriticalThumbnailerError } -export type NonCriticalThumbnailerError = { MissingCasId: number } | { FailedToExtractIsolatedFilePathData: [number, string] } | { VideoThumbnailGenerationFailed: [string, string] } | { FormatImage: [string, string] } | { WebPEncoding: [string, string] } | { PanicWhileGeneratingThumbnail: [string, string] } | { CreateShardDirectory: string } | { SaveThumbnail: [string, string] } | { ThumbnailGenerationTimeout: string } +export type NonCriticalThumbnailerError = { MissingCasId: number } | { FailedToExtractIsolatedFilePathData: [number, string] } | { VideoThumbnailGenerationFailed: [string, string] } | { FormatImage: [string, string] } | { WebPEncoding: [string, string] } | { PanicWhileGeneratingThumbnail: [string, string] } | { CreateShardDirectory: string } | { SaveThumbnail: [string, string] } export type NonIndexedPathItem = { path: string; name: string; extension: string; kind: number; is_dir: boolean; date_created: string; date_modified: string; size_in_bytes_bytes: number[]; hidden: boolean } From 29990de410c730f5b0fd9881dc90d4cf726d70b6 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Fri, 17 May 2024 20:16:14 -0300 Subject: [PATCH 14/33] Ignore .ts and .mts video files for now --- .../helpers/ffmpeg_media_data.rs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs index 02ec1e38aaa3..78ebfd1637c6 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs @@ -73,22 +73,22 @@ pub const fn can_extract_for_audio(audio_extension: AudioExtension) -> bool { #[must_use] pub const fn can_extract_for_video(video_extension: VideoExtension) -> bool { use VideoExtension::{ - Asf, Avi, Avifs, F4v, Flv, Hevc, M2ts, M2v, M4v, Mjpeg, Mkv, Mov, Mp4, Mpe, Mpeg, Mpg, Mts, - Mxf, Ogv, Qt, Swf, Ts, Vob, Webm, Wm, Wmv, Wtv, _3gp, + Asf, Avi, Avifs, F4v, Flv, Hevc, M2ts, M2v, M4v, Mjpeg, Mkv, Mov, Mp4, Mpe, Mpeg, Mpg, Mxf, + Ogv, Qt, Swf, Vob, Webm, Wm, Wmv, Wtv, _3gp, }; matches!( video_extension, Avi | Avifs | Qt | Mov | Swf - | Mjpeg | Ts | Mts - | Mpeg | Mxf | M2v - | Mpg | Mpe | M2ts - | Flv | Wm | _3gp - | M4v | Wmv | Asf - | Mp4 | Webm | Mkv - | Vob | Ogv | Wtv - | Hevc | F4v + | Mjpeg | Mpeg + | Mxf | M2v | Mpg + | Mpe | M2ts | Flv + | Wm | _3gp | M4v + | Wmv | Asf | Mp4 + | Webm | Mkv | Vob + | Ogv | Wtv | Hevc + | F4v // | Ts | Mts TODO: Uncomment when we start using magic instead of extension ) } From 63da1ea091851869befae8d00c449d5650c6735f Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Mon, 20 May 2024 15:19:15 -0300 Subject: [PATCH 15/33] Some better logs --- core/crates/heavy-lifting/src/indexer/job.rs | 23 +- core/src/location/manager/watcher/utils.rs | 11 +- packages/client/src/core.ts | 330 +++++++++---------- 3 files changed, 191 insertions(+), 173 deletions(-) diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs index 509afd333e2c..3c0d54321d38 100644 --- a/core/crates/heavy-lifting/src/indexer/job.rs +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -362,6 +362,8 @@ impl Indexer { let (to_create_count, to_update_count) = (to_create.len(), to_update.len()); + debug!("Scanned {directory_iso_file_path} in {scan_time:?}"); + *self .iso_paths_and_sizes .entry(directory_iso_file_path) @@ -447,6 +449,15 @@ impl Indexer { }) .collect::>(); + debug!( + "Dispatching more ({}W/{}S/{}U) tasks, completed ({}/{})", + handles.len(), + save_tasks.len(), + update_tasks.len(), + self.metadata.completed_tasks, + self.metadata.total_tasks + ); + handles.extend(dispatcher.dispatch_many(save_tasks).await); handles.extend(dispatcher.dispatch_many(update_tasks).await); @@ -460,8 +471,6 @@ impl Indexer { ]) .await; - debug!("Processed walk task in the indexer, took: {scan_time:?}"); - Ok(handles) } @@ -478,7 +487,10 @@ impl Indexer { ctx.progress_msg(format!("Saved {saved_count} files")).await; - debug!("Processed save task in the indexer, took: {save_duration:?}"); + debug!( + "Processed save task in the indexer ({}/{}), took: {save_duration:?}", + self.metadata.completed_tasks, self.metadata.total_tasks + ); } async fn process_update_output( @@ -495,7 +507,10 @@ impl Indexer { ctx.progress_msg(format!("Updated {updated_count} files")) .await; - debug!("Processed update task in the indexer, took: {update_duration:?}"); + debug!( + "Processed update task in the indexer ({}/{}), took: {update_duration:?}", + self.metadata.completed_tasks, self.metadata.total_tasks + ); } async fn process_handles( diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index 370ea8f287d8..890b489bbf2a 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -1124,10 +1124,13 @@ pub(super) async fn recalculate_directories_size( ) .await?; - error!( - "Reverse calculating directory sizes finished with {} non-critical errors: {non_critical_errors:#?}", - non_critical_errors.len() - ); + if !non_critical_errors.is_empty() { + error!( + "Reverse calculating directory sizes finished with {} \ + non-critical errors: {non_critical_errors:#?}", + non_critical_errors.len() + ); + } should_invalidate = true; } else { diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index eff104b76db5..026e2d589c47 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -2,147 +2,147 @@ // This file was generated by [rspc](https://github.com/oscartbeaumont/rspc). Do not edit this file manually. export type Procedures = { - queries: - { key: "auth.me", input: never, result: { id: string; email: string } } | - { key: "backups.getAll", input: never, result: GetAll } | - { key: "buildInfo", input: never, result: BuildInfo } | - { key: "cloud.getApiOrigin", input: never, result: string } | - { key: "cloud.library.get", input: LibraryArgs, result: CloudLibrary | null } | - { key: "cloud.library.list", input: never, result: CloudLibrary[] } | - { key: "cloud.locations.list", input: never, result: CloudLocation[] } | - { key: "ephemeralFiles.getMediaData", input: string, result: MediaData | null } | - { key: "files.get", input: LibraryArgs, result: ObjectWithFilePaths2 | null } | - { key: "files.getConvertibleImageExtensions", input: never, result: string[] } | - { key: "files.getMediaData", input: LibraryArgs, result: MediaData } | - { key: "files.getPath", input: LibraryArgs, result: string | null } | - { key: "invalidation.test-invalidate", input: never, result: number } | - { key: "jobs.isActive", input: LibraryArgs, result: boolean } | - { key: "jobs.reports", input: LibraryArgs, result: JobGroup[] } | - { key: "labels.count", input: LibraryArgs, result: number } | - { key: "labels.get", input: LibraryArgs, result: Label | null } | - { key: "labels.getForObject", input: LibraryArgs, result: Label[] } | - { key: "labels.getWithObjects", input: LibraryArgs, result: { [key in number]: { date_created: string; object: { id: number } }[] } } | - { key: "labels.list", input: LibraryArgs, result: Label[] } | - { key: "labels.listWithThumbnails", input: LibraryArgs, result: ExplorerItem[] } | - { key: "library.kindStatistics", input: LibraryArgs, result: KindStatistics } | - { key: "library.list", input: never, result: LibraryConfigWrapped[] } | - { key: "library.statistics", input: LibraryArgs, result: StatisticsResponse } | - { key: "locations.get", input: LibraryArgs, result: Location | null } | - { key: "locations.getWithRules", input: LibraryArgs, result: LocationWithIndexerRule | null } | - { key: "locations.indexer_rules.get", input: LibraryArgs, result: IndexerRule } | - { key: "locations.indexer_rules.list", input: LibraryArgs, result: IndexerRule[] } | - { key: "locations.indexer_rules.listForLocation", input: LibraryArgs, result: IndexerRule[] } | - { key: "locations.list", input: LibraryArgs, result: Location[] } | - { key: "locations.systemLocations", input: never, result: SystemLocations } | - { key: "models.image_detection.list", input: never, result: string[] } | - { key: "nodeState", input: never, result: NodeState } | - { key: "nodes.listLocations", input: LibraryArgs, result: ExplorerItem[] } | - { key: "notifications.dismiss", input: NotificationId, result: null } | - { key: "notifications.dismissAll", input: never, result: null } | - { key: "notifications.get", input: never, result: Notification[] } | - { key: "p2p.listeners", input: never, result: Listeners } | - { key: "p2p.state", input: never, result: JsonValue } | - { key: "preferences.get", input: LibraryArgs, result: LibraryPreferences } | - { key: "search.objects", input: LibraryArgs, result: SearchData } | - { key: "search.objectsCount", input: LibraryArgs<{ filters?: SearchFilterArgs[] }>, result: number } | - { key: "search.paths", input: LibraryArgs, result: SearchData } | - { key: "search.pathsCount", input: LibraryArgs<{ filters?: SearchFilterArgs[] }>, result: number } | - { key: "search.saved.get", input: LibraryArgs, result: SavedSearch | null } | - { key: "search.saved.list", input: LibraryArgs, result: SavedSearch[] } | - { key: "sync.enabled", input: LibraryArgs, result: boolean } | - { key: "sync.messages", input: LibraryArgs, result: CRDTOperation[] } | - { key: "tags.get", input: LibraryArgs, result: Tag | null } | - { key: "tags.getForObject", input: LibraryArgs, result: Tag[] } | - { key: "tags.getWithObjects", input: LibraryArgs, result: { [key in number]: ({ object: { id: number }; date_created: string | null })[] } } | - { key: "tags.list", input: LibraryArgs, result: Tag[] } | + queries: + { key: "auth.me", input: never, result: { id: string; email: string } } | + { key: "backups.getAll", input: never, result: GetAll } | + { key: "buildInfo", input: never, result: BuildInfo } | + { key: "cloud.getApiOrigin", input: never, result: string } | + { key: "cloud.library.get", input: LibraryArgs, result: CloudLibrary | null } | + { key: "cloud.library.list", input: never, result: CloudLibrary[] } | + { key: "cloud.locations.list", input: never, result: CloudLocation[] } | + { key: "ephemeralFiles.getMediaData", input: string, result: MediaData | null } | + { key: "files.get", input: LibraryArgs, result: ObjectWithFilePaths2 | null } | + { key: "files.getConvertibleImageExtensions", input: never, result: string[] } | + { key: "files.getMediaData", input: LibraryArgs, result: MediaData } | + { key: "files.getPath", input: LibraryArgs, result: string | null } | + { key: "invalidation.test-invalidate", input: never, result: number } | + { key: "jobs.isActive", input: LibraryArgs, result: boolean } | + { key: "jobs.reports", input: LibraryArgs, result: JobGroup[] } | + { key: "labels.count", input: LibraryArgs, result: number } | + { key: "labels.get", input: LibraryArgs, result: Label | null } | + { key: "labels.getForObject", input: LibraryArgs, result: Label[] } | + { key: "labels.getWithObjects", input: LibraryArgs, result: { [key in number]: { date_created: string; object: { id: number } }[] } } | + { key: "labels.list", input: LibraryArgs, result: Label[] } | + { key: "labels.listWithThumbnails", input: LibraryArgs, result: ExplorerItem[] } | + { key: "library.kindStatistics", input: LibraryArgs, result: KindStatistics } | + { key: "library.list", input: never, result: LibraryConfigWrapped[] } | + { key: "library.statistics", input: LibraryArgs, result: StatisticsResponse } | + { key: "locations.get", input: LibraryArgs, result: Location | null } | + { key: "locations.getWithRules", input: LibraryArgs, result: LocationWithIndexerRule | null } | + { key: "locations.indexer_rules.get", input: LibraryArgs, result: IndexerRule } | + { key: "locations.indexer_rules.list", input: LibraryArgs, result: IndexerRule[] } | + { key: "locations.indexer_rules.listForLocation", input: LibraryArgs, result: IndexerRule[] } | + { key: "locations.list", input: LibraryArgs, result: Location[] } | + { key: "locations.systemLocations", input: never, result: SystemLocations } | + { key: "models.image_detection.list", input: never, result: string[] } | + { key: "nodeState", input: never, result: NodeState } | + { key: "nodes.listLocations", input: LibraryArgs, result: ExplorerItem[] } | + { key: "notifications.dismiss", input: NotificationId, result: null } | + { key: "notifications.dismissAll", input: never, result: null } | + { key: "notifications.get", input: never, result: Notification[] } | + { key: "p2p.listeners", input: never, result: Listeners } | + { key: "p2p.state", input: never, result: JsonValue } | + { key: "preferences.get", input: LibraryArgs, result: LibraryPreferences } | + { key: "search.objects", input: LibraryArgs, result: SearchData } | + { key: "search.objectsCount", input: LibraryArgs<{ filters?: SearchFilterArgs[] }>, result: number } | + { key: "search.paths", input: LibraryArgs, result: SearchData } | + { key: "search.pathsCount", input: LibraryArgs<{ filters?: SearchFilterArgs[] }>, result: number } | + { key: "search.saved.get", input: LibraryArgs, result: SavedSearch | null } | + { key: "search.saved.list", input: LibraryArgs, result: SavedSearch[] } | + { key: "sync.enabled", input: LibraryArgs, result: boolean } | + { key: "sync.messages", input: LibraryArgs, result: CRDTOperation[] } | + { key: "tags.get", input: LibraryArgs, result: Tag | null } | + { key: "tags.getForObject", input: LibraryArgs, result: Tag[] } | + { key: "tags.getWithObjects", input: LibraryArgs, result: { [key in number]: ({ object: { id: number }; date_created: string | null })[] } } | + { key: "tags.list", input: LibraryArgs, result: Tag[] } | { key: "volumes.list", input: never, result: Volume[] }, - mutations: - { key: "api.sendFeedback", input: Feedback, result: null } | - { key: "auth.logout", input: never, result: null } | - { key: "backups.backup", input: LibraryArgs, result: string } | - { key: "backups.delete", input: string, result: null } | - { key: "backups.restore", input: string, result: null } | - { key: "cloud.library.create", input: LibraryArgs, result: null } | - { key: "cloud.library.join", input: string, result: LibraryConfigWrapped } | - { key: "cloud.library.sync", input: LibraryArgs, result: null } | - { key: "cloud.locations.create", input: string, result: CloudLocation } | - { key: "cloud.locations.remove", input: string, result: CloudLocation } | - { key: "cloud.locations.testing", input: TestingParams, result: null } | - { key: "cloud.setApiOrigin", input: string, result: null } | - { key: "ephemeralFiles.copyFiles", input: LibraryArgs, result: null } | - { key: "ephemeralFiles.createFile", input: LibraryArgs, result: string } | - { key: "ephemeralFiles.createFolder", input: LibraryArgs, result: string } | - { key: "ephemeralFiles.cutFiles", input: LibraryArgs, result: null } | - { key: "ephemeralFiles.deleteFiles", input: LibraryArgs, result: null } | - { key: "ephemeralFiles.moveToTrash", input: LibraryArgs, result: null } | - { key: "ephemeralFiles.renameFile", input: LibraryArgs, result: null } | - { key: "files.convertImage", input: LibraryArgs, result: null } | - { key: "files.copyFiles", input: LibraryArgs, result: null } | - { key: "files.createFile", input: LibraryArgs, result: string } | - { key: "files.createFolder", input: LibraryArgs, result: string } | - { key: "files.cutFiles", input: LibraryArgs, result: null } | - { key: "files.deleteFiles", input: LibraryArgs, result: null } | - { key: "files.eraseFiles", input: LibraryArgs, result: null } | - { key: "files.moveToTrash", input: LibraryArgs, result: null } | - { key: "files.removeAccessTime", input: LibraryArgs, result: null } | - { key: "files.renameFile", input: LibraryArgs, result: null } | - { key: "files.setFavorite", input: LibraryArgs, result: null } | - { key: "files.setNote", input: LibraryArgs, result: null } | - { key: "files.updateAccessTime", input: LibraryArgs, result: null } | - { key: "invalidation.test-invalidate-mutation", input: LibraryArgs, result: null } | - { key: "jobs.cancel", input: LibraryArgs, result: null } | - { key: "jobs.clear", input: LibraryArgs, result: null } | - { key: "jobs.clearAll", input: LibraryArgs, result: null } | - { key: "jobs.generateThumbsForLocation", input: LibraryArgs, result: string } | - { key: "jobs.identifyUniqueFiles", input: LibraryArgs, result: string } | - { key: "jobs.objectValidator", input: LibraryArgs, result: null } | - { key: "jobs.pause", input: LibraryArgs, result: null } | - { key: "jobs.resume", input: LibraryArgs, result: null } | - { key: "labels.delete", input: LibraryArgs, result: null } | - { key: "library.create", input: CreateLibraryArgs, result: LibraryConfigWrapped } | - { key: "library.delete", input: string, result: null } | - { key: "library.edit", input: EditLibraryArgs, result: null } | - { key: "library.startActor", input: LibraryArgs, result: null } | - { key: "library.stopActor", input: LibraryArgs, result: null } | - { key: "library.vaccumDb", input: LibraryArgs, result: null } | - { key: "locations.addLibrary", input: LibraryArgs, result: number | null } | - { key: "locations.create", input: LibraryArgs, result: number | null } | - { key: "locations.delete", input: LibraryArgs, result: null } | - { key: "locations.fullRescan", input: LibraryArgs, result: string | null } | - { key: "locations.indexer_rules.create", input: LibraryArgs, result: null } | - { key: "locations.indexer_rules.delete", input: LibraryArgs, result: null } | - { key: "locations.relink", input: LibraryArgs, result: number } | - { key: "locations.subPathRescan", input: LibraryArgs, result: string | null } | - { key: "locations.update", input: LibraryArgs, result: null } | - { key: "nodes.edit", input: ChangeNodeNameArgs, result: null } | - { key: "nodes.updateThumbnailerPreferences", input: UpdateThumbnailerPreferences, result: null } | - { key: "p2p.acceptSpacedrop", input: [string, string | null], result: null } | - { key: "p2p.cancelSpacedrop", input: string, result: null } | - { key: "p2p.debugConnect", input: RemoteIdentity, result: string } | - { key: "p2p.spacedrop", input: SpacedropArgs, result: string } | - { key: "preferences.update", input: LibraryArgs, result: null } | - { key: "search.saved.create", input: LibraryArgs<{ name: string; target?: SearchTarget; search?: string | null; filters?: string | null; description?: string | null; icon?: string | null }>, result: null } | - { key: "search.saved.delete", input: LibraryArgs, result: null } | - { key: "search.saved.update", input: LibraryArgs<[number, Args]>, result: null } | - { key: "sync.backfill", input: LibraryArgs, result: null } | - { key: "tags.assign", input: LibraryArgs<{ targets: Target[]; tag_id: number; unassign: boolean }>, result: null } | - { key: "tags.create", input: LibraryArgs, result: Tag } | - { key: "tags.delete", input: LibraryArgs, result: null } | - { key: "tags.update", input: LibraryArgs, result: null } | + mutations: + { key: "api.sendFeedback", input: Feedback, result: null } | + { key: "auth.logout", input: never, result: null } | + { key: "backups.backup", input: LibraryArgs, result: string } | + { key: "backups.delete", input: string, result: null } | + { key: "backups.restore", input: string, result: null } | + { key: "cloud.library.create", input: LibraryArgs, result: null } | + { key: "cloud.library.join", input: string, result: LibraryConfigWrapped } | + { key: "cloud.library.sync", input: LibraryArgs, result: null } | + { key: "cloud.locations.create", input: string, result: CloudLocation } | + { key: "cloud.locations.remove", input: string, result: CloudLocation } | + { key: "cloud.locations.testing", input: TestingParams, result: null } | + { key: "cloud.setApiOrigin", input: string, result: null } | + { key: "ephemeralFiles.copyFiles", input: LibraryArgs, result: null } | + { key: "ephemeralFiles.createFile", input: LibraryArgs, result: string } | + { key: "ephemeralFiles.createFolder", input: LibraryArgs, result: string } | + { key: "ephemeralFiles.cutFiles", input: LibraryArgs, result: null } | + { key: "ephemeralFiles.deleteFiles", input: LibraryArgs, result: null } | + { key: "ephemeralFiles.moveToTrash", input: LibraryArgs, result: null } | + { key: "ephemeralFiles.renameFile", input: LibraryArgs, result: null } | + { key: "files.convertImage", input: LibraryArgs, result: null } | + { key: "files.copyFiles", input: LibraryArgs, result: null } | + { key: "files.createFile", input: LibraryArgs, result: string } | + { key: "files.createFolder", input: LibraryArgs, result: string } | + { key: "files.cutFiles", input: LibraryArgs, result: null } | + { key: "files.deleteFiles", input: LibraryArgs, result: null } | + { key: "files.eraseFiles", input: LibraryArgs, result: null } | + { key: "files.moveToTrash", input: LibraryArgs, result: null } | + { key: "files.removeAccessTime", input: LibraryArgs, result: null } | + { key: "files.renameFile", input: LibraryArgs, result: null } | + { key: "files.setFavorite", input: LibraryArgs, result: null } | + { key: "files.setNote", input: LibraryArgs, result: null } | + { key: "files.updateAccessTime", input: LibraryArgs, result: null } | + { key: "invalidation.test-invalidate-mutation", input: LibraryArgs, result: null } | + { key: "jobs.cancel", input: LibraryArgs, result: null } | + { key: "jobs.clear", input: LibraryArgs, result: null } | + { key: "jobs.clearAll", input: LibraryArgs, result: null } | + { key: "jobs.generateThumbsForLocation", input: LibraryArgs, result: string } | + { key: "jobs.identifyUniqueFiles", input: LibraryArgs, result: string } | + { key: "jobs.objectValidator", input: LibraryArgs, result: null } | + { key: "jobs.pause", input: LibraryArgs, result: null } | + { key: "jobs.resume", input: LibraryArgs, result: null } | + { key: "labels.delete", input: LibraryArgs, result: null } | + { key: "library.create", input: CreateLibraryArgs, result: LibraryConfigWrapped } | + { key: "library.delete", input: string, result: null } | + { key: "library.edit", input: EditLibraryArgs, result: null } | + { key: "library.startActor", input: LibraryArgs, result: null } | + { key: "library.stopActor", input: LibraryArgs, result: null } | + { key: "library.vaccumDb", input: LibraryArgs, result: null } | + { key: "locations.addLibrary", input: LibraryArgs, result: number | null } | + { key: "locations.create", input: LibraryArgs, result: number | null } | + { key: "locations.delete", input: LibraryArgs, result: null } | + { key: "locations.fullRescan", input: LibraryArgs, result: string | null } | + { key: "locations.indexer_rules.create", input: LibraryArgs, result: null } | + { key: "locations.indexer_rules.delete", input: LibraryArgs, result: null } | + { key: "locations.relink", input: LibraryArgs, result: number } | + { key: "locations.subPathRescan", input: LibraryArgs, result: string | null } | + { key: "locations.update", input: LibraryArgs, result: null } | + { key: "nodes.edit", input: ChangeNodeNameArgs, result: null } | + { key: "nodes.updateThumbnailerPreferences", input: UpdateThumbnailerPreferences, result: null } | + { key: "p2p.acceptSpacedrop", input: [string, string | null], result: null } | + { key: "p2p.cancelSpacedrop", input: string, result: null } | + { key: "p2p.debugConnect", input: RemoteIdentity, result: string } | + { key: "p2p.spacedrop", input: SpacedropArgs, result: string } | + { key: "preferences.update", input: LibraryArgs, result: null } | + { key: "search.saved.create", input: LibraryArgs<{ name: string; target?: SearchTarget; search?: string | null; filters?: string | null; description?: string | null; icon?: string | null }>, result: null } | + { key: "search.saved.delete", input: LibraryArgs, result: null } | + { key: "search.saved.update", input: LibraryArgs<[number, Args]>, result: null } | + { key: "sync.backfill", input: LibraryArgs, result: null } | + { key: "tags.assign", input: LibraryArgs<{ targets: Target[]; tag_id: number; unassign: boolean }>, result: null } | + { key: "tags.create", input: LibraryArgs, result: Tag } | + { key: "tags.delete", input: LibraryArgs, result: null } | + { key: "tags.update", input: LibraryArgs, result: null } | { key: "toggleFeatureFlag", input: BackendFeature, result: null }, - subscriptions: - { key: "auth.loginSession", input: never, result: Response } | - { key: "invalidation.listen", input: never, result: InvalidateOperationEvent[] } | - { key: "jobs.newFilePathIdentified", input: LibraryArgs, result: number[] } | - { key: "jobs.newThumbnail", input: LibraryArgs, result: ThumbKey } | - { key: "jobs.progress", input: LibraryArgs, result: JobProgressEvent } | - { key: "library.actors", input: LibraryArgs, result: { [key in string]: boolean } } | - { key: "locations.online", input: never, result: number[][] } | - { key: "locations.quickRescan", input: LibraryArgs, result: null } | - { key: "notifications.listen", input: never, result: Notification } | - { key: "p2p.events", input: never, result: P2PEvent } | - { key: "search.ephemeralPaths", input: LibraryArgs, result: EphemeralPathsResultItem } | - { key: "sync.active", input: LibraryArgs, result: SyncStatus } | + subscriptions: + { key: "auth.loginSession", input: never, result: Response } | + { key: "invalidation.listen", input: never, result: InvalidateOperationEvent[] } | + { key: "jobs.newFilePathIdentified", input: LibraryArgs, result: number[] } | + { key: "jobs.newThumbnail", input: LibraryArgs, result: ThumbKey } | + { key: "jobs.progress", input: LibraryArgs, result: JobProgressEvent } | + { key: "library.actors", input: LibraryArgs, result: { [key in string]: boolean } } | + { key: "locations.online", input: never, result: number[][] } | + { key: "locations.quickRescan", input: LibraryArgs, result: null } | + { key: "notifications.listen", input: never, result: Notification } | + { key: "p2p.events", input: never, result: P2PEvent } | + { key: "search.ephemeralPaths", input: LibraryArgs, result: EphemeralPathsResultItem } | + { key: "sync.active", input: LibraryArgs, result: SyncStatus } | { key: "sync.newMessage", input: LibraryArgs, result: null } }; @@ -152,7 +152,7 @@ export type AudioProps = { delay: number; padding: number; sample_rate: number | /** * All of the feature flags provided by the core itself. The frontend has it's own set of feature flags! - * + * * If you want a variant of this to show up on the frontend it must be added to `backendFeatures` in `useFeatureFlag.tsx` */ export type BackendFeature = "cloudSync" @@ -181,19 +181,19 @@ export type Codec = { kind: string | null; sub_kind: string | null; tag: string export type ColorProfile = "Normal" | "Custom" | "HDRNoOriginal" | "HDRWithOriginal" | "OriginalForHDR" | "Panorama" | "PortraitHDR" | "Portrait" -export type Composite = +export type Composite = /** * The data is present, but we're unable to determine what they mean */ -"Unknown" | +"Unknown" | /** * Not a composite image */ -"False" | +"False" | /** * A general composite image */ -"General" | +"General" | /** * The composite image was captured while shooting */ @@ -298,46 +298,46 @@ export type FilePathOrder = { field: "name"; value: SortOrder } | { field: "size export type FilePathSearchArgs = { take?: number | null; orderAndPagination?: OrderAndPagination | null; filters?: SearchFilterArgs[]; groupDirectories?: boolean } -export type Flash = { +export type Flash = { /** * Specifies how flash was used (on, auto, off, forced, onvalid) - * + * * [`FlashMode::Unknown`] isn't a valid EXIF state, but it's included as the default, * just in case we're unable to correctly match it to a known (valid) state. - * + * * This type should only ever be evaluated if flash EXIF data is present, so having this as a non-option shouldn't be an issue. */ -mode: FlashMode; +mode: FlashMode; /** * Did the flash actually fire? */ -fired: boolean | null; +fired: boolean | null; /** * Did flash return to the camera? (Unsure of the meaning) */ -returned: boolean | null; +returned: boolean | null; /** * Was red eye reduction used? */ red_eye_reduction: boolean | null } -export type FlashMode = +export type FlashMode = /** * The data is present, but we're unable to determine what they mean */ -"Unknown" | +"Unknown" | /** * `FLash` was on */ -"On" | +"On" | /** * Flash was off */ -"Off" | +"Off" | /** * Flash was set to automatically fire in certain conditions */ -"Auto" | +"Auto" | /** * Flash was forcefully fired */ @@ -362,10 +362,10 @@ export type IndexerRule = { id: number; pub_id: number[]; name: string | null; d /** * `IndexerRuleCreateArgs` is the argument received from the client using rspc to create a new indexer rule. * Note that `rules` field is a vector of tuples of `RuleKind` and `parameters`. - * + * * In case of `RuleKind::AcceptFilesByGlob` or `RuleKind::RejectFilesByGlob`, it will be a * vector of strings containing a glob patterns. - * + * * In case of `RuleKind::AcceptIfChildrenDirectoriesArePresent` or `RuleKind::RejectIfChildrenDirectoriesArePresent` the * `parameters` field must be a vector of strings containing the names of the directories. */ @@ -397,19 +397,19 @@ export type LibraryArgs = { library_id: string; arg: T } /** * LibraryConfig holds the configuration for a specific library. This is stored as a '{uuid}.sdlibrary' file. */ -export type LibraryConfig = { +export type LibraryConfig = { /** * name is the display name of the library. This is used in the UI and is set by the user. */ -name: LibraryName; +name: LibraryName; /** * description is a user set description of the library. This is used in the UI and is set by the user. */ -description: string | null; +description: string | null; /** * id of the current instance so we know who this `.db` is. This can be looked up within the `Instance` table. */ -instance_id: number; +instance_id: number; /** * cloud_id is the ID of the cloud library this library is linked to. * If this is set we can assume the library is synced with the Cloud. @@ -445,7 +445,7 @@ export type LocationSettings = { explorer: ExplorerSettings } * `LocationUpdateArgs` is the argument received from the client using `rspc` to update a location. * It contains the id of the location to be updated, possible a name to change the current location's name * and a vector of indexer rules ids to add or remove from the location. - * + * * It is important to note that only the indexer rule ids in this vector will be used from now on. * Old rules that aren't in this vector will be purged. */ @@ -471,11 +471,11 @@ export type NodeConfigP2P = { discovery?: P2PDiscoveryState; port: Port; ipv4: b export type NodePreferences = Record -export type NodeState = ({ +export type NodeState = ({ /** * id is a unique identifier for the current node. Each node has a public identifier (this one) and is given a local id for each library (done within the library code). */ -id: string; +id: string; /** * name is the display name of the current node. This is set by the user and is shown in the UI. // TODO: Length validation so it can fit in DNS record */ @@ -600,7 +600,7 @@ export type SetFavoriteArgs = { id: number; favorite: boolean } export type SetNoteArgs = { id: number; note: string | null } -export type SingleInvalidateOperationEvent = { +export type SingleInvalidateOperationEvent = { /** * This fields are intentionally private. */ From 1aa2f4b7920f1c2ef88603331b549b7567d4bec9 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Mon, 20 May 2024 16:09:06 -0300 Subject: [PATCH 16/33] bruh --- core/crates/heavy-lifting/src/media_processor/job.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/crates/heavy-lifting/src/media_processor/job.rs b/core/crates/heavy-lifting/src/media_processor/job.rs index 0a217b349e52..2b9e65c1afc1 100644 --- a/core/crates/heavy-lifting/src/media_processor/job.rs +++ b/core/crates/heavy-lifting/src/media_processor/job.rs @@ -853,6 +853,10 @@ async fn dispatch_thumbnailer_tasks( ) .await?; + if file_paths.is_empty() { + return Ok((0, Vec::new())); + } + let thumbs_count = file_paths.len() as u64; let first_materialized_path = file_paths[0].materialized_path.clone(); From 1ce3308c01a4b8e9a7f58eaca950c2bb1587f059 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Tue, 21 May 2024 02:55:43 -0300 Subject: [PATCH 17/33] Internal deadlocks and excess of communication in the task system - Also better logs --- crates/task-system/src/message.rs | 7 - crates/task-system/src/system.rs | 347 +++++---- crates/task-system/src/task.rs | 126 +++- crates/task-system/src/worker/mod.rs | 58 +- crates/task-system/src/worker/run.rs | 64 +- crates/task-system/src/worker/runner.rs | 898 +++++++++--------------- 6 files changed, 610 insertions(+), 890 deletions(-) diff --git a/crates/task-system/src/message.rs b/crates/task-system/src/message.rs index 964131db76b4..ddb67e57f1a3 100644 --- a/crates/task-system/src/message.rs +++ b/crates/task-system/src/message.rs @@ -31,17 +31,11 @@ pub enum SystemMessage { worker_id: WorkerId, ack: oneshot::Sender>, }, - NotifyIdleWorkers { - start_from: WorkerId, - task_count: usize, - }, ShutdownRequest(oneshot::Sender>), } -#[derive(Debug)] pub enum WorkerMessage { NewTask(TaskWorkState), - TaskCountRequest(oneshot::Sender), ResumeTask { task_id: TaskId, ack: oneshot::Sender>, @@ -63,7 +57,6 @@ pub enum WorkerMessage { ack: oneshot::Sender, stolen_task_tx: chan::Sender>>, }, - WakeUp, } pub struct TaskRunnerOutput { diff --git a/crates/task-system/src/system.rs b/crates/task-system/src/system.rs index 7fea0a030692..c0629c45f714 100644 --- a/crates/task-system/src/system.rs +++ b/crates/task-system/src/system.rs @@ -15,7 +15,7 @@ use async_channel as chan; use futures::StreamExt; use futures_concurrency::future::Join; use tokio::{spawn, sync::oneshot, task::JoinHandle}; -use tracing::{error, info, trace, warn}; +use tracing::{error, info, instrument, trace, warn}; use super::{ error::{RunError, SystemError}, @@ -95,7 +95,7 @@ impl System { } }); - info!("Task system online with {workers_count} workers!"); + info!(%workers_count, "Task system online!"); Self { workers: Arc::clone(&workers), @@ -146,14 +146,12 @@ impl System { while let Some(msg) = msg_stream.next().await { match msg { SystemMessage::IdleReport(worker_id) => { - trace!("Task system received a worker idle report request: "); + trace!(%worker_id, "Task system received a worker idle report request"); idle_workers[worker_id].store(true, Ordering::Relaxed); } SystemMessage::WorkingReport(worker_id) => { - trace!( - "Task system received a working report request: " - ); + trace!(%worker_id, "Task system received a working report request"); idle_workers[worker_id].store(false, Ordering::Relaxed); } @@ -161,109 +159,25 @@ impl System { task_id, worker_id, ack, - } => { - trace!("Task system received a task resume request: "); - spawn({ - let workers = Arc::clone(&workers); - async move { - workers[worker_id].resume_task(task_id, ack).await; - } - }); - trace!( - "Task system resumed task: " - ); - } + } => dispatch_resume_request(&workers, task_id, worker_id, ack), SystemMessage::PauseNotRunningTask { task_id, worker_id, ack, - } => { - trace!("Task system received a task pause request: "); - spawn({ - let workers = Arc::clone(&workers); - async move { - workers[worker_id] - .pause_not_running_task(task_id, ack) - .await; - } - }); - trace!( - "Task system paused task: " - ); - } + } => dispatch_pause_not_running_task_request(&workers, task_id, worker_id, ack), SystemMessage::CancelNotRunningTask { task_id, worker_id, ack, - } => { - trace!("Task system received a task cancel request: "); - spawn({ - let workers = Arc::clone(&workers); - - async move { - workers[worker_id] - .cancel_not_running_task(task_id, ack) - .await; - } - }); - trace!( - "Task system canceled task: " - ); - } + } => dispatch_cancel_not_running_task_request(&workers, task_id, worker_id, ack), SystemMessage::ForceAbortion { task_id, worker_id, ack, - } => { - trace!( - "Task system received a task force abortion request: \ - " - ); - spawn({ - let workers = Arc::clone(&workers); - - async move { - workers[worker_id].force_task_abortion(task_id, ack).await; - } - }); - trace!( - "Task system aborted task: " - ); - } - - SystemMessage::NotifyIdleWorkers { - start_from, - task_count, - } => { - trace!( - "Task system received a request to notify idle workers: \ - " - ); - - spawn({ - let workers = Arc::clone(&workers); - let idle_workers = Arc::clone(&idle_workers); - - async move { - for idx in (0..workers.len()) - .cycle() - .skip(start_from) - .take(usize::min(task_count, workers.len())) - { - if idle_workers[idx].load(Ordering::Relaxed) { - trace!("Task system sending wake up request: "); - workers[idx].wake().await; - trace!("Task system waked up worker: "); - // we don't mark the worker as not idle because we wait for it to - // successfully steal a task and then report it back as active - } - } - } - }); - } + } => dispatch_force_abortion_task_request(&workers, task_id, worker_id, ack), SystemMessage::ShutdownRequest(tx) => { trace!("Task system received a shutdown request"); @@ -310,7 +224,7 @@ impl System { } if let Err(e) = handle.await { - error!("Task system failed to shutdown on handle await: {e:#?}"); + error!(?e, "Task system failed to shutdown on handle await"); } } else { warn!("Trying to shutdown the tasks system that was already shutdown"); @@ -318,6 +232,78 @@ impl System { } } +#[instrument(skip(workers, ack))] +fn dispatch_resume_request( + workers: &Arc>>, + task_id: TaskId, + worker_id: WorkerId, + ack: oneshot::Sender>, +) { + trace!("Task system received a task resume request"); + spawn({ + let workers = Arc::clone(workers); + async move { + workers[worker_id].resume_task(task_id, ack).await; + } + }); + trace!("Task system resumed task"); +} + +#[instrument(skip(workers, ack))] +fn dispatch_pause_not_running_task_request( + workers: &Arc>>, + task_id: TaskId, + worker_id: WorkerId, + ack: oneshot::Sender>, +) { + trace!("Task system received a task pause request"); + spawn({ + let workers = Arc::clone(workers); + async move { + workers[worker_id] + .pause_not_running_task(task_id, ack) + .await; + } + }); + trace!("Task system paused task"); +} + +#[instrument(skip(workers, ack))] +fn dispatch_cancel_not_running_task_request( + workers: &Arc>>, + task_id: TaskId, + worker_id: WorkerId, + ack: oneshot::Sender<()>, +) { + trace!("Task system received a task cancel request"); + spawn({ + let workers = Arc::clone(workers); + async move { + workers[worker_id] + .cancel_not_running_task(task_id, ack) + .await; + } + }); + trace!("Task system canceled task"); +} + +#[instrument(skip(workers, ack))] +fn dispatch_force_abortion_task_request( + workers: &Arc>>, + task_id: TaskId, + worker_id: WorkerId, + ack: oneshot::Sender>, +) { + trace!("Task system received a task force abortion request"); + spawn({ + let workers = Arc::clone(workers); + async move { + workers[worker_id].force_task_abortion(task_id, ack).await; + } + }); + trace!("Task system aborted task"); +} + /// The default implementation of the task system will create a system with a number of workers equal to the available /// parallelism in the user's machine. impl Default for System { @@ -335,104 +321,103 @@ unsafe impl Sync for System {} pub struct SystemComm(chan::Sender); impl SystemComm { - pub async fn idle_report(&self, worker_id: usize) { - self.0 - .send(SystemMessage::IdleReport(worker_id)) - .await - .expect("System channel closed trying to report idle"); + pub fn idle_report(&self, worker_id: usize) { + let system_tx = self.0.clone(); + spawn(async move { + system_tx + .send(SystemMessage::IdleReport(worker_id)) + .await + .expect("System channel closed trying to report idle"); + }); } - pub async fn working_report(&self, worker_id: usize) { - self.0 - .send(SystemMessage::WorkingReport(worker_id)) - .await - .expect("System channel closed trying to report working"); + pub fn working_report(&self, worker_id: usize) { + let system_tx = self.0.clone(); + spawn(async move { + system_tx + .send(SystemMessage::WorkingReport(worker_id)) + .await + .expect("System channel closed trying to report working"); + }); } - pub async fn pause_not_running_task( + pub fn pause_not_running_task( &self, task_id: TaskId, worker_id: WorkerId, - ) -> Result<(), SystemError> { - let (tx, rx) = oneshot::channel(); - - self.0 - .send(SystemMessage::PauseNotRunningTask { - task_id, - worker_id, - ack: tx, - }) - .await - .expect("System channel closed trying to pause not running task"); - - rx.await - .expect("System channel closed trying receive pause not running task response") + res_tx: oneshot::Sender>, + ) { + let system_tx = self.0.clone(); + spawn(async move { + system_tx + .send(SystemMessage::PauseNotRunningTask { + task_id, + worker_id, + ack: res_tx, + }) + .await + .expect("System channel closed trying to pause not running task"); + }); } - pub async fn cancel_not_running_task(&self, task_id: TaskId, worker_id: WorkerId) { - let (tx, rx) = oneshot::channel(); - - self.0 - .send(SystemMessage::CancelNotRunningTask { - task_id, - worker_id, - ack: tx, - }) - .await - .expect("System channel closed trying to cancel a not running task"); - - rx.await - .expect("System channel closed trying receive cancel a not running task response"); - } + pub fn cancel_not_running_task( + &self, + task_id: TaskId, + worker_id: WorkerId, + res_tx: oneshot::Sender<()>, + ) { + let system_tx = self.0.clone(); - pub async fn request_help(&self, worker_id: WorkerId, task_count: usize) { - self.0 - .send(SystemMessage::NotifyIdleWorkers { - start_from: worker_id, - task_count, - }) - .await - .expect("System channel closed trying to request help"); + spawn(async move { + system_tx + .send(SystemMessage::CancelNotRunningTask { + task_id, + worker_id, + ack: res_tx, + }) + .await + .expect("System channel closed trying to cancel a not running task"); + }); } - pub async fn resume_task( + pub fn resume_task( &self, task_id: TaskId, worker_id: WorkerId, - ) -> Result<(), SystemError> { - let (tx, rx) = oneshot::channel(); - - self.0 - .send(SystemMessage::ResumeTask { - task_id, - worker_id, - ack: tx, - }) - .await - .expect("System channel closed trying to resume task"); + res_tx: oneshot::Sender>, + ) { + let system_tx = self.0.clone(); - rx.await - .expect("System channel closed trying receive resume task response") + spawn(async move { + system_tx + .send(SystemMessage::ResumeTask { + task_id, + worker_id, + ack: res_tx, + }) + .await + .expect("System channel closed trying to resume task"); + }); } - pub async fn force_abortion( + pub fn force_abortion( &self, task_id: TaskId, worker_id: WorkerId, - ) -> Result<(), SystemError> { - let (tx, rx) = oneshot::channel(); - - self.0 - .send(SystemMessage::ForceAbortion { - task_id, - worker_id, - ack: tx, - }) - .await - .expect("System channel closed trying to resume task"); + res_tx: oneshot::Sender>, + ) { + let system_tx = self.0.clone(); - rx.await - .expect("System channel closed trying receive resume task response") + spawn(async move { + system_tx + .send(SystemMessage::ForceAbortion { + task_id, + worker_id, + ack: res_tx, + }) + .await + .expect("System channel closed trying to resume task"); + }); } } @@ -503,10 +488,8 @@ impl Dispatcher for BaseDispatcher { }) .expect("we hardcoded the update function to always return Some(next_worker_id) through dispatcher"); - trace!( - "Dispatching task to worker: ", - task.id() - ); + trace!(%worker_id, task_id = %task.id(), "Dispatching task to worker"); + let handle = self.workers[worker_id].add_task(task).await; self.idle_workers[worker_id].store(false, Ordering::Relaxed); @@ -518,20 +501,10 @@ impl Dispatcher for BaseDispatcher { &self, into_tasks: impl IntoIterator>> + Send, ) -> Vec> { - let mut workers_task_count = self - .workers - .iter() - .map(|worker| async move { (worker.id, worker.task_count().await) }) - .collect::>() - .join() - .await; - - workers_task_count.sort_by_key(|(_id, count)| *count); - let (handles, workers_ids_set) = into_tasks .into_iter() - .zip(workers_task_count.into_iter().cycle()) - .map(|(task, (worker_id, _))| async move { + .zip((0..self.workers.len()).cycle()) + .map(|(task, worker_id)| async move { (self.workers[worker_id].add_task(task).await, worker_id) }) .collect::>() diff --git a/crates/task-system/src/task.rs b/crates/task-system/src/task.rs index 10814cc4292c..41dbeb45a81f 100644 --- a/crates/task-system/src/task.rs +++ b/crates/task-system/src/task.rs @@ -31,12 +31,18 @@ pub type TaskId = Uuid; /// The user will downcast it to the concrete type that the task returns. Most of the time, /// tasks will not return anything, so it isn't a costly abstraction, as only a heap allocation /// is needed when the user wants to return a [`Box`]. -pub trait AnyTaskOutput: Send + fmt::Debug + Downcast + 'static {} +pub trait AnyTaskOutput: Send + Downcast + 'static {} + +impl fmt::Debug for Box { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "") + } +} impl_downcast!(AnyTaskOutput); -/// Blanket implementation for all types that implements `std::fmt::Debug + Send + 'static` -impl AnyTaskOutput for T {} +/// Blanket implementation for all types that implements `Send + 'static` +impl AnyTaskOutput for T {} /// A helper trait to convert any type that implements [`AnyTaskOutput`] into a [`TaskOutput`], boxing it. pub trait IntoAnyTaskOutput { @@ -129,7 +135,7 @@ impl + 'static, E: RunError> IntoTask for T { /// We're currently using the [`async_trait`](https://docs.rs/async-trait) crate to allow dyn async traits, /// due to a limitation in the Rust language. #[async_trait] -pub trait Task: fmt::Debug + Downcast + Send + Sync + 'static { +pub trait Task: Downcast + Send + Sync + 'static { /// An unique identifier for the task, it will be used to identify the task on the system and also to the user. fn id(&self) -> TaskId; @@ -160,6 +166,12 @@ pub trait Task: fmt::Debug + Downcast + Send + Sync + 'static { impl_downcast!(Task where E: RunError); +impl fmt::Debug for Box> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "") + } +} + pub trait SerializableTask: Task where Self: Sized, @@ -397,13 +409,18 @@ impl TaskRemoteController { } else { trace!("Task is not running, setting is_paused flag"); self.worktable.is_paused.store(true, Ordering::Relaxed); - return self - .system_comm - .pause_not_running_task( - self.task_id, - self.worktable.current_worker_id.load(Ordering::Relaxed), - ) - .await; + + let (tx, rx) = oneshot::channel(); + + self.system_comm.pause_not_running_task( + self.task_id, + self.worktable.current_worker_id.load(Ordering::Relaxed), + tx, + ); + + return rx + .await + .expect("Worker failed to ack pause not running task request"); } } @@ -436,36 +453,53 @@ impl TaskRemoteController { } else { trace!("Task is not running, setting is_canceled flag"); self.worktable.has_canceled.store(true, Ordering::Relaxed); - self.system_comm - .cancel_not_running_task( - self.task_id, - self.worktable.current_worker_id.load(Ordering::Relaxed), - ) - .await; + + let (tx, rx) = oneshot::channel(); + + self.system_comm.cancel_not_running_task( + self.task_id, + self.worktable.current_worker_id.load(Ordering::Relaxed), + tx, + ); + + rx.await + .expect("Worker failed to ack cancel not running task request"); } } } /// Forcefully abort the task, this can lead to corrupted data or inconsistent states, so use it with caution. + /// # Panics + /// Will panic if the worker failed to ack the forced abortion request pub async fn force_abortion(&self) -> Result<(), SystemError> { self.worktable.set_aborted(); - self.system_comm - .force_abortion( - self.task_id, - self.worktable.current_worker_id.load(Ordering::Relaxed), - ) - .await + + let (tx, rx) = oneshot::channel(); + + self.system_comm.force_abortion( + self.task_id, + self.worktable.current_worker_id.load(Ordering::Relaxed), + tx, + ); + + rx.await + .expect("Worker failed to ack force abortion request") } /// Marks the task to be resumed by the task system, the worker will start processing it if there is a slot /// available or will be enqueued otherwise. + /// # Panics + /// Will panic if the worker failed to ack the resume request pub async fn resume(&self) -> Result<(), SystemError> { - self.system_comm - .resume_task( - self.task_id, - self.worktable.current_worker_id.load(Ordering::Relaxed), - ) - .await + let (tx, rx) = oneshot::channel(); + + self.system_comm.resume_task( + self.task_id, + self.worktable.current_worker_id.load(Ordering::Relaxed), + tx, + ); + + rx.await.expect("Worker failed to ack resume request") } /// Verify if the task was already completed @@ -712,7 +746,23 @@ impl TaskWorktable { } } -#[derive(Debug)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PendingTaskKind { + Normal, + Priority, + Suspended, +} + +impl PendingTaskKind { + const fn with_priority(has_priority: bool) -> Self { + if has_priority { + Self::Priority + } else { + Self::Normal + } + } +} + pub struct TaskWorkState { pub(crate) task: Box>, pub(crate) worktable: Arc, @@ -722,10 +772,19 @@ pub struct TaskWorkState { impl TaskWorkState { #[inline] - pub fn task_id(&self) -> TaskId { + pub fn id(&self) -> TaskId { self.task.id() } + #[inline] + pub fn kind(&self) -> PendingTaskKind { + PendingTaskKind::with_priority(self.task.with_priority()) + } + + pub fn worker_id(&self) -> WorkerId { + self.worktable.current_worker_id.load(Ordering::Relaxed) + } + #[inline] pub fn change_worker(&self, new_worker_id: WorkerId) { self.worktable @@ -768,9 +827,8 @@ impl Drop for PanicOnSenderDrop { self.maybe_done_tx.is_none(), "TaskHandle done channel dropped before sending a result" ); - trace!( - "TaskWorkState of task successfully dropped", - self.task_id + trace!(task_id = %self.task_id, + "TaskWorkState successfully dropped" ); } } diff --git a/crates/task-system/src/worker/mod.rs b/crates/task-system/src/worker/mod.rs index b238c5b0e206..2891fdeb0f99 100644 --- a/crates/task-system/src/worker/mod.rs +++ b/crates/task-system/src/worker/mod.rs @@ -6,7 +6,7 @@ use std::{ use async_channel as chan; use tokio::{spawn, sync::oneshot, task::JoinHandle}; -use tracing::{error, info, trace, warn}; +use tracing::{error, info, instrument, trace, warn}; use super::{ error::{RunError, SystemError}, @@ -53,6 +53,7 @@ impl WorkerBuilder { ) } + #[instrument(skip(self, system_comm, task_stealer), fields(worker_id = self.id))] pub fn build(self, system_comm: SystemComm, task_stealer: WorkStealer) -> Worker { let Self { id, @@ -64,7 +65,7 @@ impl WorkerBuilder { let system_comm = system_comm.clone(); async move { - trace!("Worker message processing task starting..."); + trace!("Worker message processing task starting..."); while let Err(e) = spawn(run( id, system_comm.clone(), @@ -75,18 +76,16 @@ impl WorkerBuilder { { if e.is_panic() { error!( - "Worker critically failed and will restart: \ + "Worker critically failed and will restart: \ {e:#?}" ); } else { - trace!( - "Worker received shutdown signal and will exit..." - ); + trace!("Worker received shutdown signal and will exit..."); break; } } - info!("Worker gracefully shutdown"); + info!("Worker gracefully shutdown"); } }); @@ -137,18 +136,6 @@ impl Worker { } } - pub async fn task_count(&self) -> usize { - let (tx, rx) = oneshot::channel(); - - self.msgs_tx - .send(WorkerMessage::TaskCountRequest(tx)) - .await - .expect("Worker channel closed trying to get task count"); - - rx.await - .expect("Worker channel closed trying to receive task count response") - } - pub async fn resume_task( &self, task_id: TaskId, @@ -189,6 +176,7 @@ impl Worker { .expect("Worker channel closed trying to force task abortion"); } + #[instrument(skip(self), fields(worker_id = self.id))] pub async fn shutdown(&self) { if let Some(handle) = self .handle @@ -214,13 +202,6 @@ impl Worker { warn!("Trying to shutdown a worker that was already shutdown"); } } - - pub async fn wake(&self) { - self.msgs_tx - .send(WorkerMessage::WakeUp) - .await - .expect("Worker channel closed trying to wake up"); - } } /// SAFETY: Due to usage of refcell we lost `Sync` impl, but we only use it to have a shutdown method @@ -272,9 +253,10 @@ impl WorkStealer { } } + #[instrument(skip(self, stolen_task_tx))] pub async fn steal( &self, - worker_id: WorkerId, + stealer_worker_id: WorkerId, stolen_task_tx: &chan::Sender>>, ) { let total_workers = self.worker_comms.len(); @@ -285,28 +267,22 @@ impl WorkStealer { // Cycling over the workers .cycle() // Starting from the next worker id - .skip(worker_id) + .skip(stealer_worker_id) // Taking the total amount of workers .take(total_workers) // Removing the current worker as we can't steal from ourselves - .filter(|worker_comm| worker_comm.worker_id != worker_id) + .filter(|worker_comm| worker_comm.worker_id != stealer_worker_id) { - trace!( - "Trying to steal from worker ", - worker_comm.worker_id - ); + trace!(stolen_worker_id = worker_comm.worker_id, "Trying to steal",); if worker_comm.steal_task(stolen_task_tx.clone()).await { - trace!( - "Worker successfully stole a task", - worker_comm.worker_id - ); + trace!(stolen_worker_id = worker_comm.worker_id, "Stole a task"); return; } trace!( - "Worker has no tasks to steal", - worker_comm.worker_id + stolen_worker_id = worker_comm.worker_id, + "No tasks to steal" ); } @@ -316,8 +292,4 @@ impl WorkStealer { .await .expect("Stolen task channel closed"); } - - pub fn workers_count(&self) -> usize { - self.worker_comms.len() - } } diff --git a/crates/task-system/src/worker/run.rs b/crates/task-system/src/worker/run.rs index 531c3c0a0c71..bff430e1d081 100644 --- a/crates/task-system/src/worker/run.rs +++ b/crates/task-system/src/worker/run.rs @@ -5,7 +5,7 @@ use futures::StreamExt; use futures_concurrency::stream::Merge; use tokio::time::{interval_at, Instant}; use tokio_stream::wrappers::IntervalStream; -use tracing::{debug, error, trace, warn}; +use tracing::{debug, error, instrument, trace, warn}; use super::{ super::{ @@ -24,13 +24,15 @@ enum StreamMessage { IdleCheck, } +#[instrument(skip(system_comm, work_stealer, msgs_rx))] pub(super) async fn run( - id: WorkerId, + worker_id: WorkerId, system_comm: SystemComm, work_stealer: WorkStealer, msgs_rx: chan::Receiver>, ) { - let (mut runner, stole_task_rx, task_output_rx) = Runner::new(id, work_stealer, system_comm); + let (mut runner, stole_task_rx, task_output_rx) = + Runner::new(worker_id, work_stealer, system_comm); let mut idle_checker_interval = interval_at(Instant::now(), ONE_SECOND); idle_checker_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip); @@ -47,31 +49,26 @@ pub(super) async fn run( match msg { // Worker messages StreamMessage::Commands(WorkerMessage::NewTask(task_work_state)) => { - let task_id = task_work_state.task_id(); + let task_id = task_work_state.id(); runner.abort_steal_task(); - trace!("New task received: "); - runner.new_task(task_work_state).await; - trace!("New task added: "); - } - - StreamMessage::Commands(WorkerMessage::TaskCountRequest(tx)) => { - if tx.send(runner.total_tasks()).is_err() { - warn!("Task count request channel closed before sending task count"); - } + trace!(%task_id, "New task received"); + runner.new_task(task_id, task_work_state.kind(), task_work_state); + trace!(%task_id, "New task added"); } StreamMessage::Commands(WorkerMessage::ResumeTask { task_id, ack }) => { - trace!("Resume task request received: "); - if ack.send(runner.resume_task(task_id).await).is_err() { + trace!(%task_id, "Resume task request received"); + if ack.send(runner.resume_task(task_id)).is_err() { warn!("Resume task channel closed before sending ack"); } - trace!("Resumed task: "); + trace!(%task_id, "Resumed task"); } StreamMessage::Commands(WorkerMessage::PauseNotRunningTask { task_id, ack }) => { if ack.send(runner.pause_not_running_task(task_id)).is_err() { warn!("Resume task channel closed before sending ack"); } + trace!(%task_id, "Paused not running task response sent"); } StreamMessage::Commands(WorkerMessage::CancelNotRunningTask { task_id, ack }) => { @@ -79,19 +76,18 @@ pub(super) async fn run( if ack.send(()).is_err() { warn!("Resume task channel closed before sending ack"); } + trace!(%task_id, "Cancel not running task response sent"); } StreamMessage::Commands(WorkerMessage::ForceAbortion { task_id, ack }) => { - trace!( - "Force abortion task request received: " - ); + trace!(%task_id, "Force abortion task request received"); if ack .send(runner.force_task_abortion(&task_id).await) .is_err() { warn!("Force abortion channel closed before sending ack"); } - trace!("Force aborted task: "); + trace!(%task_id, "Force aborted task response sent"); } StreamMessage::Commands(WorkerMessage::ShutdownRequest(tx)) => { @@ -102,43 +98,39 @@ pub(super) async fn run( ack, stolen_task_tx, }) => { - trace!("Steal task request received: "); + trace!("Steal task request received"); if ack .send(runner.steal_request(stolen_task_tx).await) .is_err() { debug!("Steal request attempt aborted before sending ack"); } - trace!("Steal task request completed: "); + trace!("Steal task request completed"); } - StreamMessage::Commands(WorkerMessage::WakeUp) => runner.wake_up(), - // Runner messages StreamMessage::TaskOutput(TaskOutputMessage(task_id, Ok(output))) => { - trace!( - "Process task output request received: " - ); + trace!(%task_id, "Process task output request received"); runner.process_task_output(&task_id, output).await; - trace!("Processed task output: "); + trace!(%task_id, "Processed task output"); } StreamMessage::TaskOutput(TaskOutputMessage(task_id, Err(()))) => { - error!("Task failed "); + error!(%task_id, "Task failed"); runner.clear_errored_task(task_id).await; - trace!("Failed task cleared: "); + trace!(%task_id, "Failed task cleared"); } StreamMessage::Steal(maybe_stolen_task) => { - let maybe_task_id = maybe_stolen_task - .as_ref() - .map(|StoleTaskMessage(task_work_state)| task_work_state.task_id()); - trace!("Received stolen task request: "); - runner.process_stolen_task(maybe_stolen_task).await; trace!( - "Processed stolen task: " + maybe_task_id = ?maybe_stolen_task + .as_ref() + .map(|StoleTaskMessage(task_work_state)| task_work_state.id()), + "Received stolen task request" ); + runner.process_stolen_task(maybe_stolen_task).await; + trace!("Processed stolen task"); } // Idle checking to steal some work diff --git a/crates/task-system/src/worker/runner.rs b/crates/task-system/src/worker/runner.rs index 7175523d83e7..c471985da662 100644 --- a/crates/task-system/src/worker/runner.rs +++ b/crates/task-system/src/worker/runner.rs @@ -18,7 +18,7 @@ use tokio::{ task::{JoinError, JoinHandle}, time::{sleep, timeout, Instant}, }; -use tracing::{debug, error, trace, warn}; +use tracing::{debug, error, instrument, trace, warn}; use super::{ super::{ @@ -26,8 +26,8 @@ use super::{ message::{StoleTaskMessage, TaskOutputMessage}, system::SystemComm, task::{ - ExecStatus, InternalTaskExecStatus, Interrupter, PanicOnSenderDrop, Task, TaskId, - TaskOutput, TaskStatus, TaskWorkState, TaskWorktable, + ExecStatus, InternalTaskExecStatus, Interrupter, PanicOnSenderDrop, PendingTaskKind, + Task, TaskId, TaskOutput, TaskStatus, TaskWorkState, TaskWorktable, }, }, TaskRunnerOutput, WorkStealer, WorkerId, ONE_SECOND, @@ -50,37 +50,12 @@ struct AbortAndSuspendSignalers { suspend_tx: oneshot::Sender<()>, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(super) enum PendingTaskKind { - Normal, - Priority, - Suspended, -} - -impl PendingTaskKind { - const fn with_priority(has_priority: bool) -> Self { - if has_priority { - Self::Priority - } else { - Self::Normal - } - } -} - struct RunningTask { - task_id: TaskId, - task_kind: PendingTaskKind, + id: TaskId, + kind: PendingTaskKind, handle: JoinHandle<()>, } -fn dispatch_steal_request( - worker_id: WorkerId, - work_stealer: WorkStealer, - stole_task_tx: chan::Sender>>, -) -> JoinHandle<()> { - spawn(async move { work_stealer.steal(worker_id, &stole_task_tx).await }) -} - enum WaitingSuspendedTask { Task(TaskId), None, @@ -101,7 +76,6 @@ pub(super) struct Runner { paused_tasks: HashMap>, suspended_task: Option>, priority_tasks: VecDeque>, - last_requested_help: Instant, is_idle: bool, waiting_suspension: WaitingSuspendedTask, abort_and_suspend_map: HashMap, @@ -140,7 +114,6 @@ impl Runner { paused_tasks: HashMap::new(), suspended_task: None, priority_tasks: VecDeque::with_capacity(PRIORITY_TASK_QUEUE_INITIAL_SIZE), - last_requested_help: Instant::now(), is_idle: true, waiting_suspension: WaitingSuspendedTask::None, abort_and_suspend_map: HashMap::with_capacity(ABORT_AND_SUSPEND_MAP_INITIAL_SIZE), @@ -158,25 +131,21 @@ impl Runner { ) } + #[instrument(skip(self))] pub(super) fn total_tasks(&self) -> usize { let priority_tasks_count = self.priority_tasks.len(); let current_task_count = usize::from(self.current_task_handle.is_some()); let suspended_task_count = usize::from(self.suspended_task.is_some()); let tasks_count = self.tasks.len(); - trace!( - "Task count: \ - ", - self.worker_id + trace!(%priority_tasks_count, %current_task_count, %suspended_task_count, %tasks_count, + "Tasks count" ); priority_tasks_count + current_task_count + suspended_task_count + tasks_count } + #[instrument(skip(self, task_work_state))] pub(super) fn spawn_task_runner( &mut self, task_id: TaskId, @@ -194,106 +163,77 @@ impl Runner { ); let handle = spawn(run_single_task( - self.worker_id, task_work_state, self.task_output_tx.clone(), suspend_rx, abort_rx, )); - trace!( - "Task runner spawned: ", - self.worker_id - ); + trace!("Task runner spawned"); handle } - pub(super) async fn new_task(&mut self, task_work_state: TaskWorkState) { - let task_id = task_work_state.task.id(); - let new_kind = PendingTaskKind::with_priority(task_work_state.task.with_priority()); - - trace!( - "Received new task: ", - self.worker_id - ); + #[instrument(skip(self, task_work_state))] + pub(super) fn new_task( + &mut self, + task_id: TaskId, + task_kind: PendingTaskKind, + task_work_state: TaskWorkState, + ) { + trace!("Received new task"); - self.task_kinds.insert(task_id, new_kind); + self.task_kinds.insert(task_id, task_kind); - match self - .inner_add_task(task_id, new_kind, task_work_state) - .await - { - TaskAddStatus::Running => trace!( - "Task running: ", - self.worker_id - ), - TaskAddStatus::Enqueued => trace!( - "Task enqueued: ", - self.worker_id - ), + match self.inner_add_task(task_id, task_kind, task_work_state) { + TaskAddStatus::Running => trace!("New task is running"), + TaskAddStatus::Enqueued => { + trace!( + total_tasks = self.total_tasks(), + "Task enqueued with other tasks" + ); + } } } - pub(super) async fn resume_task(&mut self, task_id: TaskId) -> Result<(), SystemError> { - trace!( - "Resume task request: ", - self.worker_id - ); + #[instrument(skip(self))] + pub(super) fn resume_task(&mut self, task_id: TaskId) -> Result<(), SystemError> { + trace!("Resume task request"); if let Some(task_work_state) = self.paused_tasks.remove(&task_id) { task_work_state.worktable.set_unpause(); - match self - .inner_add_task( - task_id, - *self - .task_kinds - .get(&task_id) - .expect("we added the task kind before pausing it"), - task_work_state, - ) - .await - { - TaskAddStatus::Running => trace!( - "Resumed task is running: ", - self.worker_id - ), - TaskAddStatus::Enqueued => trace!( - "Resumed task was enqueued: ", - self.worker_id - ), + match self.inner_add_task( + task_id, + *self + .task_kinds + .get(&task_id) + .expect("we added the task kind before pausing it"), + task_work_state, + ) { + TaskAddStatus::Running => trace!("Resumed task is running"), + TaskAddStatus::Enqueued => trace!("Resumed task was enqueued"), } - Ok(()) - } else { - trace!( - "Task not found: ", - self.worker_id - ); - Err(SystemError::TaskNotFound(task_id)) + return Ok(()); } + + trace!("Task not found"); + Err(SystemError::TaskNotFound(task_id)) } + #[instrument(skip(self))] pub(super) fn pause_not_running_task(&mut self, task_id: TaskId) -> Result<(), SystemError> { - trace!( - "Pause not running task request: ", - self.worker_id - ); + trace!("Pause not running task request"); if self.paused_tasks.contains_key(&task_id) { - trace!( - "Task is already paused: ", - self.worker_id - ); + trace!("Task is already paused"); return Ok(()); } if let Some(current_task) = &self.current_task_handle { - if current_task.task_id == task_id { + if current_task.id == task_id { trace!( - "Task began to run before we managed to pause it, run function will pause it: \ - ", - self.worker_id + "Task began to run before we managed to pause it, run function will pause it" ); return Ok(()); // The task will pause itself } @@ -306,13 +246,11 @@ impl Runner { Err(SystemError::TaskNotFound(task_id)) } + #[instrument(skip(self))] fn pause_suspended_task(&mut self, task_id: TaskId) -> bool { if let Some(suspended_task) = &self.suspended_task { - if suspended_task.task.id() == task_id { - trace!( - "Task is already suspended but will be paused: ", - self.worker_id - ); + if suspended_task.id() == task_id { + trace!("Task is already suspended but will be paused"); self.paused_tasks.insert( task_id, @@ -326,11 +264,12 @@ impl Runner { false } + #[instrument(skip(self))] fn pause_task_from_queues(&mut self, task_id: TaskId) -> bool { if let Some(index) = self .priority_tasks .iter() - .position(|task_work_state| task_work_state.task.id() == task_id) + .position(|task_work_state| task_work_state.id() == task_id) { self.paused_tasks.insert( task_id, @@ -345,7 +284,7 @@ impl Runner { if let Some(index) = self .tasks .iter() - .position(|task_work_state| task_work_state.task.id() == task_id) + .position(|task_work_state| task_work_state.id() == task_id) { self.paused_tasks.insert( task_id, @@ -358,18 +297,14 @@ impl Runner { false } + #[instrument(skip(self))] pub(super) fn cancel_not_running_task(&mut self, task_id: &TaskId) { - trace!( - "Cancel not running task request: ", - self.worker_id - ); + trace!("Cancel not running task request"); if let Some(current_task) = &self.current_task_handle { - if current_task.task_id == *task_id { + if current_task.id == *task_id { trace!( - "Task began to run before we managed to cancel it, run function will cancel it: \ - ", - self.worker_id + "Task began to run before we managed to cancel it, run function will cancel it" ); return; // The task will cancel itself } @@ -380,16 +315,10 @@ impl Runner { self.task_kinds.remove(task_id); if let Some(suspended_task) = &self.suspended_task { - if suspended_task.task.id() == *task_id { - trace!( - "Task is already suspended but will be canceled: ", - self.worker_id - ); + if suspended_task.id() == *task_id { + trace!("Task is already suspended but will be canceled"); - send_cancel_task_response( - self.worker_id, - self.suspended_task.take().expect("we just checked it"), - ); + send_cancel_task_response(self.suspended_task.take().expect("we just checked it")); return; } @@ -400,15 +329,15 @@ impl Runner { // If the task is not found, then it's possible that the user already canceled it but still have the handle } + #[instrument(skip(self))] #[inline] fn cancel_task_from_queues(&mut self, task_id: &TaskId) { if let Some(index) = self .priority_tasks .iter() - .position(|task_work_state| task_work_state.task.id() == *task_id) + .position(|task_work_state| task_work_state.id() == *task_id) { send_cancel_task_response( - self.worker_id, self.priority_tasks .remove(index) .expect("we just checked it"), @@ -420,15 +349,13 @@ impl Runner { if let Some(index) = self .tasks .iter() - .position(|task_work_state| task_work_state.task.id() == *task_id) + .position(|task_work_state| task_work_state.id() == *task_id) { - send_cancel_task_response( - self.worker_id, - self.tasks.remove(index).expect("we just checked it"), - ); + send_cancel_task_response(self.tasks.remove(index).expect("we just checked it")); } } + #[instrument(skip(self, task_work_state))] #[inline] fn add_task_when_idle( &mut self, @@ -436,15 +363,12 @@ impl Runner { task_kind: PendingTaskKind, task_work_state: TaskWorkState, ) { - trace!( - "Idle worker will process the new task: ", - self.worker_id - ); + trace!("Idle worker will process the new task"); let handle = self.spawn_task_runner(task_id, task_work_state); self.current_task_handle = Some(RunningTask { - task_id, - task_kind, + id: task_id, + kind: task_kind, handle, }); @@ -453,122 +377,89 @@ impl Runner { self.is_idle = false; } + #[instrument(skip(self, task_work_state))] #[inline] - pub(super) async fn inner_add_task( + fn add_task_when_busy( &mut self, - task_id: TaskId, - task_kind: PendingTaskKind, + new_kind: PendingTaskKind, task_work_state: TaskWorkState, + old_task_id: TaskId, + old_kind: PendingTaskKind, ) -> TaskAddStatus { - if self.is_idle { - self.add_task_when_idle(task_id, task_kind, task_work_state); - TaskAddStatus::Running - } else { - let RunningTask { - task_id: old_task_id, - task_kind: old_kind, - .. - } = self - .current_task_handle - .as_ref() - .expect("Worker isn't idle, but no task is running"); - - trace!( - "Worker is busy: \ - ", - self.worker_id, - ); - - let add_status = match (task_kind, old_kind) { - (PendingTaskKind::Priority, PendingTaskKind::Priority) => { + match (new_kind, old_kind) { + (PendingTaskKind::Priority, PendingTaskKind::Priority) => { + trace!("Old and new tasks have priority, will put new task on priority queue"); + self.priority_tasks.push_front(task_work_state); + TaskAddStatus::Enqueued + } + (PendingTaskKind::Priority, PendingTaskKind::Normal) => { + if self.waiting_suspension.is_waiting() { trace!( - "Old and new tasks have priority, will put new task on priority queue: \ - ", - self.worker_id + "Worker is already waiting for a task to be suspended, will enqueue new task" ); self.priority_tasks.push_front(task_work_state); + } else { + trace!("Old task will be suspended"); + // We put the query at the top of the priority queue, so it will be + // dispatched by the run function as soon as the current task is suspended + self.priority_tasks.push_front(task_work_state); - TaskAddStatus::Enqueued - } - (PendingTaskKind::Priority, PendingTaskKind::Normal) => { - if self.waiting_suspension.is_waiting() { - trace!( - "Worker is already waiting for a task to be suspended, will enqueue new task: \ - ", - self.worker_id - ); - - self.priority_tasks.push_front(task_work_state); - } else { - trace!( - "Old task will be suspended: \ - ", - self.worker_id - ); - - // We put the query at the top of the priority queue, so it will be - // dispatched by the run function as soon as the current task is suspended - self.priority_tasks.push_front(task_work_state); - - if self - .abort_and_suspend_map - .remove(old_task_id) - .expect("we always store the abort and suspend signalers") - .suspend_tx - .send(()) - .is_err() - { - warn!( - "Task suspend channel closed before receiving suspend signal. \ + if self + .abort_and_suspend_map + .remove(&old_task_id) + .expect("we always store the abort and suspend signalers") + .suspend_tx + .send(()) + .is_err() + { + warn!(%old_task_id, + "Suspend channel closed before receiving suspend signal. \ This probably happened because the task finished before we could suspend it." ); - } - - self.waiting_suspension = WaitingSuspendedTask::Task(*old_task_id); } - TaskAddStatus::Running - } - (_, _) => { - trace!( - "New task doesn't have priority and will be enqueued: \ - ", - self.worker_id, - ); - - self.tasks.push_back(task_work_state); - - TaskAddStatus::Enqueued + self.waiting_suspension = WaitingSuspendedTask::Task(old_task_id); } - }; - let task_count = self.total_tasks(); - - trace!( - "Worker with {task_count} pending tasks: ", - self.worker_id - ); + TaskAddStatus::Running + } + (_, _) => { + trace!("New task doesn't have priority and will be enqueued"); + self.tasks.push_back(task_work_state); - if task_count > self.work_stealer.workers_count() - && self.last_requested_help.elapsed() > ONE_SECOND - { - trace!( - "Worker requesting help from the system: \ - ", - self.worker_id - ); + TaskAddStatus::Enqueued + } + } + } - self.system_comm - .request_help(self.worker_id, task_count) - .await; + #[instrument(skip(self, task_work_state))] + #[inline] + pub(super) fn inner_add_task( + &mut self, + task_id: TaskId, + task_kind: PendingTaskKind, + task_work_state: TaskWorkState, + ) -> TaskAddStatus { + if self.is_idle { + self.add_task_when_idle(task_id, task_kind, task_work_state); + TaskAddStatus::Running + } else { + trace!("Worker is busy"); - self.last_requested_help = Instant::now(); - } + let RunningTask { + id: old_task_id, + kind: old_kind, + .. + } = self + .current_task_handle + .as_ref() + .expect("Worker isn't idle, but no task is running"); - add_status + self.add_task_when_busy(task_kind, task_work_state, *old_task_id, *old_kind) } } + #[instrument(skip(self))] pub(super) async fn force_task_abortion( &mut self, task_id: &TaskId, @@ -580,9 +471,8 @@ impl Runner { if abort_tx.send(tx).is_err() { debug!( - "Failed to send force abortion request, the task probably finished before we could abort it: \ - ", - self.worker_id + "Failed to send force abortion request, \ + the task probably finished before we could abort it" ); Ok(()) @@ -596,18 +486,14 @@ impl Runner { } } } else { - trace!( - "Forced abortion of a not running task request: ", - self.worker_id - ); + trace!("Forced abortion of a not running task request"); if let Some(current_task) = &self.current_task_handle { - if current_task.task_id == *task_id { + if current_task.id == *task_id { trace!( - "Task began to run before we managed to abort it, run function will abort it: \ - ", - self.worker_id - ); + "Task began to run before we managed to abort it, \ + run function will abort it" + ); return Ok(()); // The task will abort itself } } @@ -615,14 +501,10 @@ impl Runner { self.task_kinds.remove(task_id); if let Some(suspended_task) = &self.suspended_task { - if suspended_task.task.id() == *task_id { - trace!( - "Task is already suspended but will be force aborted: ", - self.worker_id - ); + if suspended_task.id() == *task_id { + trace!("Task is already suspended but will be force aborted"); send_forced_abortion_task_response( - self.worker_id, self.suspended_task.take().expect("we just checked it"), ); @@ -633,10 +515,9 @@ impl Runner { if let Some(index) = self .priority_tasks .iter() - .position(|task_work_state| task_work_state.task.id() == *task_id) + .position(|task_work_state| task_work_state.id() == *task_id) { send_forced_abortion_task_response( - self.worker_id, self.priority_tasks .remove(index) .expect("we just checked it"), @@ -648,31 +529,26 @@ impl Runner { if let Some(index) = self .tasks .iter() - .position(|task_work_state| task_work_state.task.id() == *task_id) + .position(|task_work_state| task_work_state.id() == *task_id) { send_forced_abortion_task_response( - self.worker_id, self.tasks.remove(index).expect("we just checked it"), ); return Ok(()); } - // If the task is not found, then it's possible that the user already aborted it but still have the handle + // If the task is not found, then it's possible that + // the user already aborted it but still have the handle Ok(()) } } + #[instrument(skip(self, tx))] pub(super) async fn shutdown(mut self, tx: oneshot::Sender<()>) { - trace!( - "Worker beginning shutdown process: ", - self.worker_id - ); + trace!("Worker beginning shutdown process"); - trace!( - "Aborting steal task for shutdown if there is one running: ", - self.worker_id - ); + trace!("Aborting steal task for shutdown if there is one running"); self.abort_steal_task(); @@ -693,7 +569,7 @@ impl Runner { } = self; if is_idle { - trace!("Worker is idle, no tasks to shutdown: "); + trace!("Worker is idle, no tasks to shutdown"); assert!( current_task_handle.is_none(), "can't shutdown with a running task if we're idle" @@ -711,33 +587,31 @@ impl Runner { "can't shutdown with a suspended task if we're idle" ); - for paused_task in paused_tasks.into_values() { - send_shutdown_task_response(worker_id, &paused_task.task.id(), paused_task); - } + paused_tasks + .into_values() + .for_each(send_shutdown_task_response); } else { - trace!("Worker is busy, will shutdown tasks: "); + trace!("Worker is busy, will shutdown tasks"); if let Some(RunningTask { - task_id, handle, .. + id: task_id, + handle, + .. }) = current_task_handle.take() { for (task_id, AbortAndSuspendSignalers { suspend_tx, .. }) in abort_and_suspend_map { if suspend_tx.send(()).is_err() { - warn!( - "Shutdown request channel closed before sending abort signal: \ - " + warn!(%task_id, + "Shutdown request channel closed before sending abort signal" ); } else { - trace!( - "Sent suspend signal for task on shutdown: \ - " - ); + trace!(%task_id, "Sent suspend signal for task on shutdown"); } } if let Err(e) = handle.await { - error!("Task failed to join: {e:#?}"); + error!(%task_id, ?e, "Task failed to join"); } stolen_task_tx.close(); @@ -756,16 +630,10 @@ impl Runner { .chain(suspended_task.into_iter()) .chain(paused_tasks.into_values()) .chain(tasks.into_iter()) - .for_each(|task_work_state| { - send_shutdown_task_response( - worker_id, - &task_work_state.task.id(), - task_work_state, - ); - }); + .for_each(send_shutdown_task_response); } - trace!("Worker shutdown process completed: "); + trace!("Worker shutdown process completed"); if tx.send(()).is_err() { warn!("Shutdown request channel closed before sending ack"); @@ -796,42 +664,35 @@ impl Runner { status, }) => match status { InternalTaskExecStatus::Done(out) => { - send_complete_task_response(worker_id, &task_id, task_work_state, out); + send_complete_task_response(task_work_state, out); } InternalTaskExecStatus::Canceled => { - send_cancel_task_response(worker_id, task_work_state); + send_cancel_task_response(task_work_state); } InternalTaskExecStatus::Suspend | InternalTaskExecStatus::Paused => { - send_shutdown_task_response(worker_id, &task_id, task_work_state); + send_shutdown_task_response(task_work_state); } InternalTaskExecStatus::Error(e) => { - send_error_task_response(worker_id, task_work_state, e); + send_error_task_response(task_work_state, e); } }, Err(()) => { - error!( - "Task failed to suspend on shutdown" - ); + error!(%task_id, "Task failed to suspend on shutdown"); } }, StreamMessage::Steal(Some(StoleTaskMessage(task_work_state))) => { trace!( - "Worker stole task: \ - ", - worker_id, - task_work_state.task.id() + stolen_worker_id = %task_work_state.worker_id(), + task_id = %task_work_state.id(), + "Stole task", ); task_work_state.change_worker(worker_id); - send_shutdown_task_response( - worker_id, - &task_work_state.task.id(), - task_work_state, - ); + send_shutdown_task_response(task_work_state); } StreamMessage::Steal(None) => {} @@ -855,109 +716,56 @@ impl Runner { .map(|task| (PendingTaskKind::Normal, task)) } + #[instrument(skip(self))] pub(super) async fn steal_request( &mut self, stolen_task_tx: chan::Sender>>, ) -> bool { - trace!("Steal request: ", self.worker_id); + trace!("Steal request"); if let Some((kind, task_work_state)) = self.get_next_task() { - self.proceed_with_task_to_be_stolen(kind, task_work_state, stolen_task_tx) - .await - } else { - trace!("No task to steal: ", self.worker_id); - false - } - } - - async fn proceed_with_task_to_be_stolen( - &mut self, - kind: PendingTaskKind, - task_work_state: TaskWorkState, - stolen_task_tx: chan::Sender>>, - ) -> bool { - let task_id = task_work_state.task.id(); - self.task_kinds.remove(&task_id); - - trace!( - "Stealing task: ", - self.worker_id - ); + let task_id = task_work_state.id(); + self.task_kinds.remove(&task_id); - if let Err(chan::SendError(Some(StoleTaskMessage(task_work_state)))) = stolen_task_tx - .send(Some(StoleTaskMessage(task_work_state))) - .await - { - self.put_back_failed_to_stole_task(task_id, kind, task_work_state); - false - } else { - true - } - } - - fn put_back_failed_to_stole_task( - &mut self, - id: TaskId, - kind: PendingTaskKind, - task_work_state: TaskWorkState, - ) { - warn!( - "Steal request channel closed before sending task: ", - self.worker_id - ); - match kind { - PendingTaskKind::Normal => self.tasks.push_front(task_work_state), - PendingTaskKind::Priority => self.priority_tasks.push_front(task_work_state), - PendingTaskKind::Suspended => { - assert!( - self.suspended_task.is_none(), - "tried to suspend a task when we already have a suspended task on PendingTaskKind::Suspended" - ); - self.suspended_task = Some(task_work_state); - } - } + trace!(%task_id, ?kind, "Task being stolen"); - self.task_kinds.insert(id, kind); - } + if let Err(chan::SendError(Some(StoleTaskMessage(task_work_state)))) = stolen_task_tx + .send(Some(StoleTaskMessage(task_work_state))) + .await + { + warn!("Steal request channel closed before sending task"); + match kind { + PendingTaskKind::Normal => self.tasks.push_front(task_work_state), + PendingTaskKind::Priority => self.priority_tasks.push_front(task_work_state), + PendingTaskKind::Suspended => { + assert!( + self.suspended_task.is_none(), + "tried to suspend a task when we already have a suspended task" + ); + self.suspended_task = Some(task_work_state); + } + } - pub(super) fn wake_up(&mut self) { - if self.is_idle { - trace!( - "Worker is idle, waking up: ", - self.worker_id - ); + self.task_kinds.insert(task_id, kind); - if self.current_steal_task_handle.is_none() { - self.current_steal_task_handle = Some(dispatch_steal_request( - self.worker_id, - self.work_stealer.clone(), - self.stole_task_tx.clone(), - )); + false } else { - trace!( - "Steal task already running, ignoring wake up request: ", - self.worker_id - ); + true } } else { - trace!( - "Worker already working, ignoring wake up request: ", - self.worker_id - ); + trace!("No task to steal"); + false } } + #[instrument(skip(self))] #[inline] pub(super) async fn dispatch_next_task(&mut self, finished_task_id: &TaskId) { - trace!( - "Task finished and will try to process a new task: \ - ", - self.worker_id - ); + trace!("Task finished and will try to process a new task"); self.abort_and_suspend_map.remove(finished_task_id); let RunningTask { - task_id: old_task_id, + id: old_task_id, handle, .. @@ -966,48 +774,33 @@ impl Runner { .take() .expect("Task handle missing, but task output received"); - assert_eq!(*finished_task_id, old_task_id, "Task output id mismatch"); + assert_eq!(*finished_task_id, old_task_id, "Task output id mismatch"); // Sanity check - trace!( - "Waiting task handle: ", - self.worker_id - ); + trace!("Waiting task handle",); if let Err(e) = handle.await { - error!("Task failed to join: {e:#?}"); + error!(?e, "Task failed to join"); } - trace!( - "Waited task handle: ", - self.worker_id - ); + trace!("Waited task handle"); - if let Some((task_kind, task_work_state)) = self.get_next_task() { - let task_id = task_work_state.task.id(); + if let Some((next_task_kind, task_work_state)) = self.get_next_task() { + let next_task_id = task_work_state.id(); - trace!( - "Dispatching next task: ", - self.worker_id - ); + trace!(%next_task_id, ?next_task_kind, "Dispatching next task"); - let handle = self.spawn_task_runner(task_id, task_work_state); + let handle = self.spawn_task_runner(next_task_id, task_work_state); self.current_task_handle = Some(RunningTask { - task_id, - task_kind, + id: next_task_id, + kind: next_task_kind, handle, }); } else { - trace!( - "No task to dispatch, worker is now idle and will dispatch a steal request: ", - self.worker_id - ); + trace!("No task to dispatch, worker is now idle and will dispatch a steal request"); self.is_idle = true; - self.system_comm.idle_report(self.worker_id).await; + self.system_comm.idle_report(self.worker_id); - trace!( - "Worker reported idle status: ", - self.worker_id - ); + trace!("Worker reported idle status"); if self.current_steal_task_handle.is_none() { self.current_steal_task_handle = Some(dispatch_steal_request( @@ -1016,14 +809,12 @@ impl Runner { self.stole_task_tx.clone(), )); } else { - trace!( - "Steal task already running: ", - self.worker_id - ); + trace!("Steal task already running"); } } } + #[instrument(skip(self, task_work_state, status))] pub(super) async fn process_task_output( &mut self, task_id: &TaskId, @@ -1035,80 +826,63 @@ impl Runner { match status { InternalTaskExecStatus::Done(out) => { self.task_kinds.remove(task_id); - send_complete_task_response(self.worker_id, task_id, task_work_state, out); + send_complete_task_response(task_work_state, out); } InternalTaskExecStatus::Paused => { self.paused_tasks.insert(*task_id, task_work_state); - trace!( - "Task paused: ", - self.worker_id - ); + trace!("Task paused"); } InternalTaskExecStatus::Canceled => { self.task_kinds.remove(task_id); - send_cancel_task_response(self.worker_id, task_work_state); + send_cancel_task_response(task_work_state); } InternalTaskExecStatus::Error(e) => { self.task_kinds.remove(task_id); - send_error_task_response(self.worker_id, task_work_state, e); + send_error_task_response(task_work_state, e); } InternalTaskExecStatus::Suspend => { assert!( self.suspended_task.is_none(), - "tried to suspend a task when we already have a suspended task on InternalTaskExecStatus::Suspend" + "tried to suspend a task when we already have a suspended task" ); self.suspended_task = Some(task_work_state); - trace!( - "Task suspended: ", - self.worker_id - ); + trace!("Task suspended"); self.clean_suspended_task(task_id); } } - trace!( - "Processing task output completed and will try to dispatch a new task: \ - ", - self.worker_id - ); + trace!("Processing task output completed and will try to dispatch a new task"); self.dispatch_next_task(task_id).await; } + #[instrument(skip(self))] pub(super) fn idle_check(&mut self) { if self.is_idle { - trace!( - "Worker is idle for some time and will try to steal a task: ", - self.worker_id - ); + trace!("Worker is idle for some time and will try to steal a task"); if self.current_steal_task_handle.is_none() { self.steal_attempt(); } else { - trace!( - "Steal task already running, ignoring on this idle check: ", - self.worker_id - ); + trace!("Steal task already running, ignoring on this idle check"); } self.idle_memory_cleanup(); } } + #[instrument(skip(self), fields(steal_attempts_count = self.steal_attempts_count))] fn steal_attempt(&mut self) { let elapsed = self.last_steal_attempt_at.elapsed(); let required = (TEN_SECONDS * self.steal_attempts_count).min(ONE_MINUTE); - trace!( - "Steal attempt required cool down: \ - ", - self.worker_id, - self.steal_attempts_count - ); + + trace!(?elapsed, ?required, "Steal attempt required cool down"); + if elapsed > required { self.current_steal_task_handle = Some(dispatch_steal_request( self.worker_id, @@ -1117,11 +891,7 @@ impl Runner { )); self.last_steal_attempt_at = Instant::now(); } else { - trace!( - "Steal attempt still cooling down: ", - self.worker_id, - self.steal_attempts_count - ); + trace!("Steal attempt still cooling down"); } } @@ -1133,7 +903,11 @@ impl Runner { } if self.task_kinds.capacity() > TASK_QUEUE_INITIAL_SIZE { - assert_eq!(self.task_kinds.len(), self.paused_tasks.len(), "If we're idle, the number of task_kinds MUST be equal to the number of paused tasks"); + assert_eq!( + self.task_kinds.len(), + self.paused_tasks.len(), + "If we're idle, the number of task_kinds MUST be equal to the number of paused tasks" + ); self.task_kinds.shrink_to(TASK_QUEUE_INITIAL_SIZE); } @@ -1154,71 +928,79 @@ impl Runner { } } + #[instrument(skip(self))] pub(super) fn abort_steal_task(&mut self) { if let Some(steal_task_handle) = self.current_steal_task_handle.take() { steal_task_handle.abort(); - trace!("Aborted steal task: ", self.worker_id); + trace!("Aborted steal task"); } else { - trace!("No steal task to abort: ", self.worker_id); + trace!("No steal task to abort"); } } + #[instrument( + skip(self, maybe_new_task), + fields( + maybe_new_task = ?maybe_new_task.as_ref() + .map(|StoleTaskMessage(task_work_state)| task_work_state.id()) + ) + )] pub(super) async fn process_stolen_task( &mut self, maybe_new_task: Option>, ) { if let Some(steal_task_handle) = self.current_steal_task_handle.take() { if let Err(e) = steal_task_handle.await { - error!("Steal task failed to join: {e:#?}"); + error!(?e, "Steal task failed to join"); } } if let Some(StoleTaskMessage(task_work_state)) = maybe_new_task { - self.system_comm.working_report(self.worker_id).await; - trace!( - "Stolen task: ", - self.worker_id, - task_work_state.task.id() - ); + self.system_comm.working_report(self.worker_id); + + let stolen_task_id = task_work_state.id(); + + trace!(%stolen_task_id, "Stolen task"); + self.steal_attempts_count = 0; - self.new_task(task_work_state).await; + self.new_task(stolen_task_id, task_work_state.kind(), task_work_state); } else { self.steal_attempts_count += 1; } } + #[instrument(skip(self))] pub(crate) fn clean_suspended_task(&mut self, task_id: &TaskId) { match self.waiting_suspension { WaitingSuspendedTask::Task(waiting_task_id) if waiting_task_id == *task_id => { - trace!( - "Task was suspended and will be cleaned: ", - self.worker_id - ); + trace!("Task was suspended and will be cleaned"); self.waiting_suspension = WaitingSuspendedTask::None; } WaitingSuspendedTask::Task(_) => { - trace!( - "Task wasn't suspended, ignoring: ", - self.worker_id - ); + trace!("Task wasn't suspended, ignoring"); + } + WaitingSuspendedTask::None => { + // Everything is Awesome! } - WaitingSuspendedTask::None => {} } } + #[instrument(skip(self))] pub(crate) async fn clear_errored_task(&mut self, task_id: TaskId) { self.task_kinds.remove(&task_id); self.clean_suspended_task(&task_id); + trace!("Cleansed errored task"); + self.dispatch_next_task(&task_id).await; } } type RunTaskOutput = (Box>, Result, SystemError>); +#[instrument(skip(task, worktable, interrupter))] fn handle_run_task_attempt( - worker_id: WorkerId, task_id: TaskId, mut task: Box>, worktable: &TaskWorktable, @@ -1230,21 +1012,15 @@ fn handle_run_task_attempt( let already_aborted = worktable.has_aborted(); let early_result = if already_paused { - trace!( - "Task was paused before running: " - ); + trace!("Task was paused before running"); Some(Ok(Ok(ExecStatus::Paused))) } else if already_canceled { - trace!( - "Task was canceled before running: " - ); + trace!("Task was canceled before running"); Some(Ok(Ok(ExecStatus::Canceled))) } else if already_aborted { - trace!( - "Task was aborted before running: " - ); + trace!("Task was aborted before running"); Some(Err(SystemError::TaskAborted(task_id))) } else { @@ -1271,7 +1047,7 @@ fn handle_run_task_attempt( match run_result { Ok(res) => { - trace!("Ran task: : {res:?}"); + trace!(?res, "Ran task"); (task, Ok(res)) } @@ -1283,8 +1059,6 @@ fn handle_run_task_attempt( } fn handle_task_suspension( - worker_id: WorkerId, - task_id: TaskId, has_suspended: Arc, worktable: Arc, suspend_rx: oneshot::Receiver<()>, @@ -1293,21 +1067,18 @@ fn handle_task_suspension( if suspend_rx.await.is_ok() { let (tx, rx) = oneshot::channel(); - trace!("Suspend signal received: "); + trace!("Suspend signal received"); worktable.suspend(tx, has_suspended).await; if rx.await.is_ok() { - trace!("Suspending: "); + trace!("Suspending"); } else { // The task probably finished before we could suspend it so the channel was dropped - trace!("Suspend channel closed: "); + trace!("Suspend channel closed"); } } else { - trace!( - "Suspend channel closed, task probably finished before we could suspend it: \ - " - ); + trace!("Suspend channel closed, task probably finished before we could suspend it"); } }) } @@ -1320,7 +1091,6 @@ type PartialTaskWorkState = ( ); async fn emit_task_completed_message( - worker_id: WorkerId, run_task_output: RunTaskOutput, has_suspended: Arc, (task_id, worktable, done_tx, interrupter): PartialTaskWorkState, @@ -1328,9 +1098,8 @@ async fn emit_task_completed_message( ) { match run_task_output { (task, Ok(res)) => { - trace!( - "Task completed ok: " - ); + trace!(?res, "Task completed ok"); + task_output_tx .send(TaskOutputMessage(task_id, { let mut internal_status = res.into(); @@ -1339,7 +1108,7 @@ async fn emit_task_completed_message( if matches!(internal_status, InternalTaskExecStatus::Paused) && suspended { internal_status = InternalTaskExecStatus::Suspend; } else if matches!(internal_status, InternalTaskExecStatus::Paused) { - debug!("Task completed with status: {internal_status:#?}"); + debug!(?internal_status, "Task completed with status"); } Ok(TaskRunnerOutput { @@ -1357,7 +1126,7 @@ async fn emit_task_completed_message( } (_, Err(e)) => { - error!("Task had an error: : {e:#?}"); + error!(?e, "Task had an error"); if done_tx .send(if matches!(e, SystemError::TaskAborted(_)) { @@ -1380,8 +1149,8 @@ async fn emit_task_completed_message( } } +#[instrument(skip_all, fields(task_id = %task.id()))] async fn run_single_task( - worker_id: WorkerId, TaskWorkState { task, worktable, @@ -1399,23 +1168,15 @@ async fn run_single_task( let task_id = task.id(); - trace!("Running task: "); + trace!("Running task"); - let handle = handle_run_task_attempt( - worker_id, - task_id, - task, - &worktable, - Arc::clone(&interrupter), - ); + let handle = handle_run_task_attempt(task_id, task, &worktable, Arc::clone(&interrupter)); let task_abort_handle = handle.abort_handle(); let has_suspended = Arc::new(AtomicBool::new(false)); let suspender_handle = handle_task_suspension( - worker_id, - task_id, Arc::clone(&has_suspended), Arc::clone(&worktable), suspend_rx, @@ -1423,14 +1184,12 @@ async fn run_single_task( match (async { RaceOutput::Completed(handle.await) }, async move { if let Ok(tx) = abort_rx.await { - trace!("Aborting task: "); + trace!("Aborting task"); RaceOutput::Abort(tx) } else { // If the abort channel is closed, we should just ignore it and keep waiting for the task to finish // as we're being suspended by the worker - trace!( - "Abort channel closed, will wait for task to finish: " - ); + trace!("Abort channel closed, will wait for task to finish"); pending().await } }) @@ -1439,7 +1198,6 @@ async fn run_single_task( { RaceOutput::Completed(Ok(run_task_output)) => { emit_task_completed_message( - worker_id, run_task_output, has_suspended, (task_id, worktable, done_tx, interrupter), @@ -1449,7 +1207,7 @@ async fn run_single_task( } RaceOutput::Completed(Err(join_error)) => { - error!("Task failed to join: {join_error:#?}",); + error!(?join_error, "Task failed to join"); if done_tx.send(Err(SystemError::TaskJoin(task_id))).is_err() { error!("Task done channel closed while sending join error response"); } @@ -1468,7 +1226,7 @@ async fn run_single_task( RaceOutput::Abort(tx) => { task_abort_handle.abort(); - trace!("Task aborted: "); + trace!("Task aborted"); if done_tx.send(Ok(TaskStatus::ForcedAbortion)).is_err() { error!("Task done channel closed while sending abort error response"); @@ -1491,40 +1249,37 @@ async fn run_single_task( } if !suspender_handle.is_finished() { - trace!( - "Aborting suspender handler as it isn't needed anymore: " - ); + trace!("Aborting suspender handler as it isn't needed anymore"); // if we received a suspend signal this abort will do nothing, as the task finished already suspender_handle.abort(); } - trace!("Run single task finished: "); + trace!("Run single task finished"); } +#[instrument(skip(task, done_tx, worktable, out), fields(task_id = %task.id()))] fn send_complete_task_response( - worker_id: WorkerId, - task_id: &TaskId, TaskWorkState { - done_tx, worktable, .. + done_tx, + worktable, + task, + .. }: TaskWorkState, out: TaskOutput, ) { worktable.set_completed(); - if done_tx.send(Ok(TaskStatus::Done((*task_id, out)))).is_err() { - warn!( - "Task done channel closed before sending done response for task: \ - " - ); + if done_tx + .send(Ok(TaskStatus::Done((task.id(), out)))) + .is_err() + { + warn!("Task done channel closed before sending done response for task"); } else { - trace!( - "Emitted task done signal on shutdown: \ - " - ); + trace!("Emitted task done signal on shutdown"); } } +#[instrument(skip(task, done_tx, worktable), fields(task_id = %task.id()))] fn send_cancel_task_response( - worker_id: WorkerId, TaskWorkState { task, done_tx, @@ -1534,23 +1289,14 @@ fn send_cancel_task_response( ) { worktable.set_canceled(); if done_tx.send(Ok(TaskStatus::Canceled)).is_err() { - warn!( - "Task done channel closed before sending canceled response for task: \ - ", - task.id(), - ); + warn!("Task done channel closed before sending canceled response for task"); } else { - trace!( - "Emitted task canceled signal on cancel not running task: \ - ", - task.id(), - ); + trace!("Emitted task canceled signal on cancel not running task"); } } +#[instrument(skip(task, done_tx, worktable), fields(task_id = %task.id()))] fn send_shutdown_task_response( - worker_id: WorkerId, - task_id: &TaskId, TaskWorkState { task, done_tx, @@ -1560,20 +1306,14 @@ fn send_shutdown_task_response( ) { worktable.set_shutdown(); if done_tx.send(Ok(TaskStatus::Shutdown(task))).is_err() { - warn!( - "Task done channel closed before sending shutdown response for task: \ - " - ); + warn!("Task done channel closed before sending shutdown response for task"); } else { - trace!( - "Successfully suspended and sent back DynTask on worker shutdown: \ - " - ); + trace!("Successfully suspended and sent back DynTask on worker shutdown"); } } +#[instrument(skip(task, done_tx, worktable), fields(task_id = %task.id()))] fn send_error_task_response( - worker_id: usize, TaskWorkState { task, done_tx, @@ -1584,22 +1324,14 @@ fn send_error_task_response( ) { worktable.set_completed(); if done_tx.send(Ok(TaskStatus::Error(e))).is_err() { - warn!( - "Task done channel closed before sending error response for task: \ - ", - task.id() - ); + warn!("Task done channel closed before sending error response for task"); } else { - trace!( - "Emitted task error signal on shutdown: \ - ", - task.id() - ); + trace!("Emitted task error signal on shutdown"); } } +#[instrument(skip(task, done_tx, worktable), fields(task_id = %task.id()))] fn send_forced_abortion_task_response( - worker_id: WorkerId, TaskWorkState { task, done_tx, @@ -1609,16 +1341,16 @@ fn send_forced_abortion_task_response( ) { worktable.set_aborted(); if done_tx.send(Ok(TaskStatus::ForcedAbortion)).is_err() { - warn!( - "Task done channel closed before sending forced abortion response for task: \ - ", - task.id() - ); + warn!("Task done channel closed before sending forced abortion response for task"); } else { - trace!( - "Emitted task forced abortion signal on cancel not running task: \ - ", - task.id() - ); + trace!("Emitted task forced abortion signal on cancel not running task"); } } + +fn dispatch_steal_request( + worker_id: WorkerId, + work_stealer: WorkStealer, + stole_task_tx: chan::Sender>>, +) -> JoinHandle<()> { + spawn(async move { work_stealer.steal(worker_id, &stole_task_tx).await }) +} From bb81353e5f87278ced65b66f5c8e49c5e370e409 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Thu, 23 May 2024 03:37:01 -0300 Subject: [PATCH 18/33] Bunch of fixes and optimizations --- core/crates/heavy-lifting/src/indexer/job.rs | 342 +++++-- .../heavy-lifting/src/indexer/shallow.rs | 4 + .../src/indexer/tasks/walker/entry.rs | 91 ++ .../src/indexer/tasks/walker/metadata.rs | 64 ++ .../tasks/{walker.rs => walker/mod.rs} | 880 ++++-------------- .../src/indexer/tasks/walker/rules.rs | 319 +++++++ .../src/indexer/tasks/walker/save_state.rs | 219 +++++ .../src/media_processor/tasks/thumbnailer.rs | 38 +- core/crates/indexer-rules/src/lib.rs | 56 +- core/crates/indexer-rules/src/seed.rs | 33 +- crates/task-system/src/task.rs | 12 +- 11 files changed, 1226 insertions(+), 832 deletions(-) create mode 100644 core/crates/heavy-lifting/src/indexer/tasks/walker/entry.rs create mode 100644 core/crates/heavy-lifting/src/indexer/tasks/walker/metadata.rs rename core/crates/heavy-lifting/src/indexer/tasks/{walker.rs => walker/mod.rs} (67%) create mode 100644 core/crates/heavy-lifting/src/indexer/tasks/walker/rules.rs create mode 100644 core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs index 3c0d54321d38..d8b439eb70b0 100644 --- a/core/crates/heavy-lifting/src/indexer/job.rs +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -24,7 +24,7 @@ use sd_task_system::{ use sd_utils::{db::maybe_missing, u64_to_frontend}; use std::{ - collections::{HashMap, HashSet}, + collections::{HashMap, HashSet, VecDeque}, hash::{Hash, Hasher}, mem, path::PathBuf, @@ -38,7 +38,7 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use serde_json::json; use tokio::time::Instant; -use tracing::{debug, error, warn}; +use tracing::{debug, error, trace, warn}; use super::{ remove_non_existing_file_paths, reverse_update_directories_sizes, @@ -63,6 +63,11 @@ pub struct Indexer { ancestors_already_indexed: HashSet>, iso_paths_and_sizes: HashMap, u64>, + processing_first_directory: bool, + + to_create_buffer: VecDeque, + to_update_buffer: VecDeque, + errors: Vec, pending_tasks_on_resume: Vec>, @@ -147,49 +152,26 @@ impl Job for Indexer { return res; } + if let Some(res) = self + .dispatch_last_save_and_update_tasks(&mut pending_running_tasks, &ctx, &dispatcher) + .await + { + return res; + } + + if let Some(res) = self + .index_pending_ancestors(&mut pending_running_tasks, &ctx, &dispatcher) + .await + { + return res; + } + if !self.tasks_for_shutdown.is_empty() { return Ok(ReturnStatus::Shutdown( SerializableJob::::serialize(self).await, )); } - if !self.ancestors_needing_indexing.is_empty() { - let save_tasks = self - .ancestors_needing_indexing - .drain() - .chunks(BATCH_SIZE) - .into_iter() - .map(|chunk| { - let chunked_saves = chunk.collect::>(); - self.metadata.total_paths += chunked_saves.len() as u64; - self.metadata.total_save_steps += 1; - - SaveTask::new_deep( - self.location.id, - self.location.pub_id.clone(), - chunked_saves, - Arc::clone(ctx.db()), - Arc::clone(ctx.sync()), - ) - }) - .collect::>(); - - pending_running_tasks.extend(dispatcher.dispatch_many(save_tasks).await); - - if let Some(res) = self - .process_handles(&mut pending_running_tasks, &ctx, &dispatcher) - .await - { - return res; - } - - if !self.tasks_for_shutdown.is_empty() { - return Ok(ReturnStatus::Shutdown( - SerializableJob::::serialize(self).await, - )); - } - } - // From here onward, job will not be interrupted anymore let Self { @@ -283,6 +265,12 @@ impl Indexer { location, sub_path, metadata: Metadata::default(), + + processing_first_directory: true, + + to_create_buffer: VecDeque::new(), + to_update_buffer: VecDeque::new(), + errors: Vec::new(), pending_tasks_on_resume: Vec::new(), @@ -354,6 +342,7 @@ impl Indexer { total_size, mut handles, scan_time, + .. }: WalkTaskOutput, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, @@ -413,41 +402,8 @@ impl Indexer { remove_non_existing_file_paths(to_remove, ctx.db(), ctx.sync()).await?; self.metadata.db_write_time += db_delete_time.elapsed(); - let save_tasks = to_create - .into_iter() - .chunks(BATCH_SIZE) - .into_iter() - .map(|chunk| { - let chunked_saves = chunk.collect::>(); - self.metadata.total_paths += chunked_saves.len() as u64; - self.metadata.total_save_steps += 1; - - SaveTask::new_deep( - self.location.id, - self.location.pub_id.clone(), - chunked_saves, - Arc::clone(ctx.db()), - Arc::clone(ctx.sync()), - ) - }) - .collect::>(); - - let update_tasks = to_update - .into_iter() - .chunks(BATCH_SIZE) - .into_iter() - .map(|chunk| { - let chunked_updates = chunk.collect::>(); - self.metadata.total_updated_paths += chunked_updates.len() as u64; - self.metadata.total_update_steps += 1; - - UpdateTask::new_deep( - chunked_updates, - Arc::clone(ctx.db()), - Arc::clone(ctx.sync()), - ) - }) - .collect::>(); + let (save_tasks, update_tasks) = + self.prepare_save_and_update_tasks(to_create, to_update, ctx); debug!( "Dispatching more ({}W/{}S/{}U) tasks, completed ({}/{})", @@ -625,6 +581,207 @@ impl Indexer { Ok(()) } + + async fn dispatch_last_save_and_update_tasks( + &mut self, + pending_running_tasks: &mut FuturesUnordered>, + ctx: &impl JobContext, + dispatcher: &JobTaskDispatcher, + ) -> Option> { + if !self.to_create_buffer.is_empty() || !self.to_update_buffer.is_empty() { + if !self.to_create_buffer.is_empty() { + assert!( + self.to_create_buffer.len() <= BATCH_SIZE, + "last save task must be less than BATCH_SIZE paths" + ); + + self.metadata.total_paths += self.to_create_buffer.len() as u64; + self.metadata.total_save_steps += 1; + + pending_running_tasks.push( + dispatcher + .dispatch(SaveTask::new_deep( + self.location.id, + self.location.pub_id.clone(), + self.to_create_buffer.drain(..).collect(), + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + )) + .await, + ); + } + + if !self.to_update_buffer.is_empty() { + assert!( + self.to_update_buffer.len() <= BATCH_SIZE, + "last update task must be less than BATCH_SIZE paths" + ); + + self.metadata.total_updated_paths += self.to_update_buffer.len() as u64; + self.metadata.total_update_steps += 1; + + pending_running_tasks.push( + dispatcher + .dispatch(UpdateTask::new_deep( + self.to_update_buffer.drain(..).collect(), + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + )) + .await, + ); + } + + self.process_handles(pending_running_tasks, ctx, dispatcher) + .await + } else { + None + } + } + + async fn index_pending_ancestors( + &mut self, + pending_running_tasks: &mut FuturesUnordered>, + ctx: &impl JobContext, + dispatcher: &JobTaskDispatcher, + ) -> Option> { + if self.ancestors_needing_indexing.is_empty() { + return None; + } + + let save_tasks = self + .ancestors_needing_indexing + .drain() + .chunks(BATCH_SIZE) + .into_iter() + .map(|chunk| { + let chunked_saves = chunk.collect::>(); + self.metadata.total_paths += chunked_saves.len() as u64; + self.metadata.total_save_steps += 1; + + SaveTask::new_deep( + self.location.id, + self.location.pub_id.clone(), + chunked_saves, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + ) + }) + .collect::>(); + + pending_running_tasks.extend(dispatcher.dispatch_many(save_tasks).await); + + self.process_handles(pending_running_tasks, ctx, dispatcher) + .await + } + + fn prepare_save_and_update_tasks( + &mut self, + to_create: Vec, + to_update: Vec, + ctx: &impl JobContext, + ) -> (Vec, Vec) { + if self.processing_first_directory { + // If we are processing the first directory, we dispatch shallow tasks with higher priority + // this way we provide a faster feedback loop to the user + self.processing_first_directory = false; + + let save_tasks = to_create + .into_iter() + .chunks(BATCH_SIZE) + .into_iter() + .map(|chunk| { + let chunked_saves = chunk.collect::>(); + self.metadata.total_paths += chunked_saves.len() as u64; + self.metadata.total_save_steps += 1; + + SaveTask::new_shallow( + self.location.id, + self.location.pub_id.clone(), + chunked_saves, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + ) + }) + .collect::>(); + + let update_tasks = to_update + .into_iter() + .chunks(BATCH_SIZE) + .into_iter() + .map(|chunk| { + let chunked_updates = chunk.collect::>(); + self.metadata.total_updated_paths += chunked_updates.len() as u64; + self.metadata.total_update_steps += 1; + + UpdateTask::new_shallow( + chunked_updates, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + ) + }) + .collect::>(); + + (save_tasks, update_tasks) + } else { + self.to_create_buffer.extend(to_create); + + let save_tasks = if self.to_create_buffer.len() > BATCH_SIZE { + let chunks_count = self.to_create_buffer.len() / BATCH_SIZE; + let mut save_tasks = Vec::with_capacity(chunks_count); + + for _ in 0..chunks_count { + let chunked_saves = self + .to_create_buffer + .drain(..BATCH_SIZE) + .collect::>(); + + self.metadata.total_paths += chunked_saves.len() as u64; + self.metadata.total_save_steps += 1; + + save_tasks.push(SaveTask::new_deep( + self.location.id, + self.location.pub_id.clone(), + chunked_saves, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + )); + } + save_tasks + } else { + trace!("Not enough entries to dispatch a new saver task"); + vec![] + }; + + self.to_update_buffer.extend(to_update); + + let update_tasks = if self.to_update_buffer.len() > BATCH_SIZE { + let chunks_count = self.to_update_buffer.len() / BATCH_SIZE; + let mut update_tasks = Vec::with_capacity(chunks_count); + + for _ in 0..chunks_count { + let chunked_updates = self + .to_update_buffer + .drain(..BATCH_SIZE) + .collect::>(); + + self.metadata.total_updated_paths += chunked_updates.len() as u64; + self.metadata.total_update_steps += 1; + + update_tasks.push(UpdateTask::new_deep( + chunked_updates, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + )); + } + update_tasks + } else { + trace!("Not enough entries to dispatch a new updater task"); + vec![] + }; + + (save_tasks, update_tasks) + } + } } #[derive(Debug, Clone, Serialize, Deserialize, Default)] @@ -693,11 +850,16 @@ struct SaveState { metadata: Metadata, iso_file_path_factory: IsoFilePathFactory, - indexer_ruler_bytes: Vec, + indexer_ruler: IndexerRuler, walker_root_path: Option>, ancestors_needing_indexing: HashSet, ancestors_already_indexed: HashSet>, - paths_and_sizes: HashMap, u64>, + iso_paths_and_sizes: HashMap, u64>, + + processing_first_directory: bool, + + to_create_buffer: VecDeque, + to_update_buffer: VecDeque, errors: Vec, @@ -715,7 +877,10 @@ impl SerializableJob for Indexer { walker_root_path, ancestors_needing_indexing, ancestors_already_indexed, - iso_paths_and_sizes: paths_and_sizes, + iso_paths_and_sizes, + processing_first_directory, + to_create_buffer, + to_update_buffer, errors, tasks_for_shutdown, .. @@ -726,11 +891,15 @@ impl SerializableJob for Indexer { sub_path, metadata, iso_file_path_factory, - indexer_ruler_bytes: indexer_ruler.serialize().await?, + indexer_ruler, walker_root_path, ancestors_needing_indexing, ancestors_already_indexed, - paths_and_sizes, + iso_paths_and_sizes, + processing_first_directory, + to_create_buffer, + to_update_buffer, + errors, tasks_for_shutdown_bytes: Some(SerializedTasks(rmp_serde::to_vec_named( &tasks_for_shutdown .into_iter() @@ -765,7 +934,6 @@ impl SerializableJob for Indexer { .try_join() .await?, )?)), - errors, }) .map(Some) } @@ -779,17 +947,18 @@ impl SerializableJob for Indexer { sub_path, metadata, iso_file_path_factory, - indexer_ruler_bytes, + indexer_ruler, walker_root_path, ancestors_needing_indexing, ancestors_already_indexed, - paths_and_sizes, + iso_paths_and_sizes, + processing_first_directory, + to_create_buffer, + to_update_buffer, errors, tasks_for_shutdown_bytes, } = rmp_serde::from_slice::(serialized_job)?; - let indexer_ruler = IndexerRuler::deserialize(&indexer_ruler_bytes)?; - Ok(Some(( Self { location, @@ -800,7 +969,10 @@ impl SerializableJob for Indexer { walker_root_path, ancestors_needing_indexing, ancestors_already_indexed, - iso_paths_and_sizes: paths_and_sizes, + iso_paths_and_sizes, + processing_first_directory, + to_create_buffer, + to_update_buffer, errors, pending_tasks_on_resume: Vec::new(), tasks_for_shutdown: Vec::new(), diff --git a/core/crates/heavy-lifting/src/indexer/shallow.rs b/core/crates/heavy-lifting/src/indexer/shallow.rs index c238998ffb4c..aadf6662a51b 100644 --- a/core/crates/heavy-lifting/src/indexer/shallow.rs +++ b/core/crates/heavy-lifting/src/indexer/shallow.rs @@ -55,6 +55,7 @@ pub async fn shallow( to_create, to_update, to_remove, + non_indexed_paths, mut errors, directory_iso_file_path, total_size, @@ -71,6 +72,9 @@ pub async fn shallow( return Ok(vec![]); }; + // TODO use non_indexed_paths here in the future, sending it to frontend, showing then alongside the indexed files from db + debug!("Non indexed paths count: {}", non_indexed_paths.len()); + let removed_count = remove_non_existing_file_paths(to_remove, db, sync).await?; let Some(Metadata { diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/entry.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/entry.rs new file mode 100644 index 000000000000..f12baf8c21b2 --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/entry.rs @@ -0,0 +1,91 @@ +use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData}; + +use sd_prisma::prisma::file_path; + +use std::{ + hash::{Hash, Hasher}, + path::{Path, PathBuf}, +}; + +use serde::{Deserialize, Serialize}; +use uuid::Uuid; + +/// `WalkedEntry` represents a single path in the filesystem +#[derive(Debug, Serialize, Deserialize)] +pub struct WalkedEntry { + pub pub_id: Uuid, + pub maybe_object_id: file_path::object_id::Type, + pub iso_file_path: IsolatedFilePathData<'static>, + pub metadata: FilePathMetadata, +} + +impl PartialEq for WalkedEntry { + fn eq(&self, other: &Self) -> bool { + self.iso_file_path == other.iso_file_path + } +} + +impl Eq for WalkedEntry {} + +impl Hash for WalkedEntry { + fn hash(&self, state: &mut H) { + self.iso_file_path.hash(state); + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub(super) struct WalkingEntry { + pub(super) iso_file_path: IsolatedFilePathData<'static>, + pub(super) metadata: FilePathMetadata, +} + +impl From for WalkedEntry { + fn from( + WalkingEntry { + iso_file_path, + metadata, + }: WalkingEntry, + ) -> Self { + Self { + pub_id: Uuid::new_v4(), + maybe_object_id: None, + iso_file_path, + metadata, + } + } +} + +impl From<(Uuid, file_path::object_id::Type, WalkingEntry)> for WalkedEntry { + fn from( + ( + pub_id, + maybe_object_id, + WalkingEntry { + iso_file_path, + metadata, + }, + ): (Uuid, file_path::object_id::Type, WalkingEntry), + ) -> Self { + Self { + pub_id, + maybe_object_id, + iso_file_path, + metadata, + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct ToWalkEntry { + pub(super) path: PathBuf, + pub(super) parent_dir_accepted_by_its_children: Option, +} + +impl> From

for ToWalkEntry { + fn from(path: P) -> Self { + Self { + path: path.as_ref().into(), + parent_dir_accepted_by_its_children: None, + } + } +} diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/metadata.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/metadata.rs new file mode 100644 index 000000000000..31ad443fc012 --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/metadata.rs @@ -0,0 +1,64 @@ +use crate::indexer; + +use sd_core_file_path_helper::FilePathMetadata; +use sd_core_indexer_rules::MetadataForIndexerRules; + +use std::{fs::Metadata, path::Path}; + +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize)] +pub(super) struct InnerMetadata { + pub is_dir: bool, + pub is_symlink: bool, + pub inode: u64, + pub size_in_bytes: u64, + pub hidden: bool, + pub created_at: DateTime, + pub modified_at: DateTime, +} + +impl InnerMetadata { + pub fn new( + path: impl AsRef, + metadata: &Metadata, + ) -> Result { + let FilePathMetadata { + inode, + size_in_bytes, + created_at, + modified_at, + hidden, + } = FilePathMetadata::from_path(path, metadata) + .map_err(|e| indexer::NonCriticalIndexerError::FilePathMetadata(e.to_string()))?; + + Ok(Self { + is_dir: metadata.is_dir(), + is_symlink: metadata.is_symlink(), + inode, + size_in_bytes, + hidden, + created_at, + modified_at, + }) + } +} + +impl MetadataForIndexerRules for InnerMetadata { + fn is_dir(&self) -> bool { + self.is_dir + } +} + +impl From for FilePathMetadata { + fn from(metadata: InnerMetadata) -> Self { + Self { + inode: metadata.inode, + size_in_bytes: metadata.size_in_bytes, + hidden: metadata.hidden, + created_at: metadata.created_at, + modified_at: metadata.modified_at, + } + } +} diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs similarity index 67% rename from core/crates/heavy-lifting/src/indexer/tasks/walker.rs rename to core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs index 5e568fde7394..b74ba5270677 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/walker.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs @@ -1,4 +1,10 @@ -use crate::{indexer, Error, NonCriticalError}; +use crate::{ + indexer::{ + self, + tasks::walker::rules::{apply_indexer_rules, process_rules_results}, + }, + Error, NonCriticalError, +}; use sd_core_file_path_helper::{FilePathError, FilePathMetadata, IsolatedFilePathData}; use sd_core_indexer_rules::{ @@ -9,95 +15,36 @@ use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker}; use sd_prisma::prisma::file_path; use sd_task_system::{ - check_interruption, BaseTaskDispatcher, ExecStatus, Interrupter, IntoAnyTaskOutput, - SerializableTask, Task, TaskDispatcher, TaskHandle, TaskId, + check_interruption, BaseTaskDispatcher, ExecStatus, Interrupter, IntoAnyTaskOutput, Task, + TaskDispatcher, TaskHandle, TaskId, }; use sd_utils::{db::inode_from_db, error::FileIOError}; use std::{ - collections::{hash_map::Entry, HashMap, HashSet}, + collections::{HashMap, HashSet}, fmt, - fs::Metadata, future::Future, - hash::{Hash, Hasher}, mem, path::{Path, PathBuf}, sync::Arc, time::Duration, }; -use chrono::{DateTime, Duration as ChronoDuration, FixedOffset, Utc}; +use chrono::{DateTime, Duration as ChronoDuration, FixedOffset}; use futures_concurrency::future::Join; -use serde::{Deserialize, Serialize}; use tokio::{fs, time::Instant}; use tokio_stream::{wrappers::ReadDirStream, StreamExt}; -use tracing::trace; -use uuid::Uuid; - -/// `WalkedEntry` represents a single path in the filesystem -#[derive(Debug, Serialize, Deserialize)] -pub struct WalkedEntry { - pub pub_id: Uuid, - pub maybe_object_id: file_path::object_id::Type, - pub iso_file_path: IsolatedFilePathData<'static>, - pub metadata: FilePathMetadata, -} +use tracing::{instrument, trace}; -impl PartialEq for WalkedEntry { - fn eq(&self, other: &Self) -> bool { - self.iso_file_path == other.iso_file_path - } -} - -impl Eq for WalkedEntry {} +mod entry; +mod metadata; +mod rules; +mod save_state; -impl Hash for WalkedEntry { - fn hash(&self, state: &mut H) { - self.iso_file_path.hash(state); - } -} - -#[derive(Debug, Serialize, Deserialize)] -struct WalkingEntry { - iso_file_path: IsolatedFilePathData<'static>, - metadata: FilePathMetadata, -} - -impl From for WalkedEntry { - fn from( - WalkingEntry { - iso_file_path, - metadata, - }: WalkingEntry, - ) -> Self { - Self { - pub_id: Uuid::new_v4(), - maybe_object_id: None, - iso_file_path, - metadata, - } - } -} +pub use entry::{ToWalkEntry, WalkedEntry}; -impl From<(Uuid, file_path::object_id::Type, WalkingEntry)> for WalkedEntry { - fn from( - ( - pub_id, - maybe_object_id, - WalkingEntry { - iso_file_path, - metadata, - }, - ): (Uuid, file_path::object_id::Type, WalkingEntry), - ) -> Self { - Self { - pub_id, - maybe_object_id, - iso_file_path, - metadata, - } - } -} +use entry::WalkingEntry; +use metadata::InnerMetadata; pub trait IsoFilePathFactory: Clone + Send + Sync + fmt::Debug + 'static { fn build( @@ -122,240 +69,6 @@ pub trait WalkerDBProxy: Clone + Send + Sync + fmt::Debug + 'static { > + Send; } -#[derive(Debug, Serialize, Deserialize)] -pub struct ToWalkEntry { - path: PathBuf, - parent_dir_accepted_by_its_children: Option, -} - -impl> From

for ToWalkEntry { - fn from(path: P) -> Self { - Self { - path: path.as_ref().into(), - parent_dir_accepted_by_its_children: None, - } - } -} - -#[derive(Debug)] -pub struct WalkTaskOutput { - pub to_create: Vec, - pub to_update: Vec, - pub to_remove: Vec, - pub accepted_ancestors: HashSet, - pub errors: Vec, - pub directory_iso_file_path: IsolatedFilePathData<'static>, - pub total_size: u64, - pub handles: Vec>, - pub scan_time: Duration, -} - -#[derive(Debug, Serialize, Deserialize)] -struct InnerMetadata { - pub is_dir: bool, - pub is_symlink: bool, - pub inode: u64, - pub size_in_bytes: u64, - pub hidden: bool, - pub created_at: DateTime, - pub modified_at: DateTime, -} - -impl InnerMetadata { - fn new( - path: impl AsRef, - metadata: &Metadata, - ) -> Result { - let FilePathMetadata { - inode, - size_in_bytes, - created_at, - modified_at, - hidden, - } = FilePathMetadata::from_path(path, metadata) - .map_err(|e| indexer::NonCriticalIndexerError::FilePathMetadata(e.to_string()))?; - - Ok(Self { - is_dir: metadata.is_dir(), - is_symlink: metadata.is_symlink(), - inode, - size_in_bytes, - hidden, - created_at, - modified_at, - }) - } -} - -impl MetadataForIndexerRules for InnerMetadata { - fn is_dir(&self) -> bool { - self.is_dir - } -} - -impl From for FilePathMetadata { - fn from(metadata: InnerMetadata) -> Self { - Self { - inode: metadata.inode, - size_in_bytes: metadata.size_in_bytes, - hidden: metadata.hidden, - created_at: metadata.created_at, - modified_at: metadata.modified_at, - } - } -} - -#[derive(Debug)] -enum WalkerStage { - Start, - Walking { - read_dir_stream: ReadDirStream, - found_paths: Vec, - }, - CollectingMetadata { - found_paths: Vec, - }, - CheckingIndexerRules { - paths_and_metadatas: HashMap, - }, - ProcessingRulesResults { - paths_metadatas_and_acceptance: - HashMap>)>, - }, - GatheringFilePathsToRemove { - accepted_paths: HashMap, - maybe_to_keep_walking: Option>, - accepted_ancestors: HashSet, - }, - Finalize { - walking_entries: Vec, - accepted_ancestors: HashSet, - to_remove_entries: Vec, - maybe_to_keep_walking: Option>, - }, -} - -#[derive(Debug, Serialize, Deserialize)] -struct WalkDirSaveState { - id: TaskId, - entry: ToWalkEntry, - root: Arc, - entry_iso_file_path: IsolatedFilePathData<'static>, - stage: WalkerStageSaveState, - errors: Vec, - scan_time: Duration, - is_shallow: bool, -} - -#[derive(Debug, Serialize, Deserialize)] -enum WalkerStageSaveState { - Start, - CollectingMetadata { - found_paths: Vec, - }, - CheckingIndexerRules { - paths_and_metadatas: HashMap, - }, - ProcessingRulesResults { - paths_metadatas_and_acceptance: - HashMap>)>, - }, - GatheringFilePathsToRemove { - accepted_paths: HashMap, - maybe_to_keep_walking: Option>, - accepted_ancestors: HashSet, - }, - Finalize { - walking_entries: Vec, - accepted_ancestors: HashSet, - to_remove_entries: Vec, - maybe_to_keep_walking: Option>, - }, -} - -impl From for WalkerStageSaveState { - fn from(stage: WalkerStage) -> Self { - match stage { - // We can't store the current state of `ReadDirStream` so we start again from the beginning - WalkerStage::Start | WalkerStage::Walking { .. } => Self::Start, - WalkerStage::CollectingMetadata { found_paths } => { - Self::CollectingMetadata { found_paths } - } - WalkerStage::CheckingIndexerRules { - paths_and_metadatas, - } => Self::CheckingIndexerRules { - paths_and_metadatas, - }, - WalkerStage::ProcessingRulesResults { - paths_metadatas_and_acceptance, - } => Self::ProcessingRulesResults { - paths_metadatas_and_acceptance, - }, - WalkerStage::GatheringFilePathsToRemove { - accepted_paths, - maybe_to_keep_walking, - accepted_ancestors, - } => Self::GatheringFilePathsToRemove { - accepted_paths, - maybe_to_keep_walking, - accepted_ancestors, - }, - WalkerStage::Finalize { - walking_entries, - accepted_ancestors, - to_remove_entries, - maybe_to_keep_walking, - } => Self::Finalize { - walking_entries, - accepted_ancestors, - to_remove_entries, - maybe_to_keep_walking, - }, - } - } -} - -impl From for WalkerStage { - fn from(value: WalkerStageSaveState) -> Self { - match value { - WalkerStageSaveState::Start => Self::Start, - WalkerStageSaveState::CollectingMetadata { found_paths } => { - Self::CollectingMetadata { found_paths } - } - WalkerStageSaveState::CheckingIndexerRules { - paths_and_metadatas, - } => Self::CheckingIndexerRules { - paths_and_metadatas, - }, - WalkerStageSaveState::ProcessingRulesResults { - paths_metadatas_and_acceptance, - } => Self::ProcessingRulesResults { - paths_metadatas_and_acceptance, - }, - WalkerStageSaveState::GatheringFilePathsToRemove { - accepted_paths, - maybe_to_keep_walking, - accepted_ancestors, - } => Self::GatheringFilePathsToRemove { - accepted_paths, - maybe_to_keep_walking, - accepted_ancestors, - }, - WalkerStageSaveState::Finalize { - walking_entries, - accepted_ancestors, - to_remove_entries, - maybe_to_keep_walking, - } => Self::Finalize { - walking_entries, - accepted_ancestors, - to_remove_entries, - maybe_to_keep_walking, - }, - } - } -} - #[derive(Debug)] pub struct WalkDirTask> where @@ -377,133 +90,18 @@ where is_shallow: bool, } -impl WalkDirTask -where - DBProxy: WalkerDBProxy, - IsoPathFactory: IsoFilePathFactory, - Dispatcher: TaskDispatcher, -{ - pub fn new_deep( - entry: impl Into + Send, - root: Arc, - indexer_ruler: IndexerRuler, - iso_file_path_factory: IsoPathFactory, - db_proxy: DBProxy, - dispatcher: Dispatcher, - ) -> Result { - let entry = entry.into(); - Ok(Self { - id: TaskId::new_v4(), - root, - indexer_ruler, - entry_iso_file_path: iso_file_path_factory.build(&entry.path, true)?, - iso_file_path_factory, - db_proxy, - stage: WalkerStage::Start, - entry, - maybe_dispatcher: Some(dispatcher), - is_shallow: false, - errors: Vec::new(), - scan_time: Duration::ZERO, - }) - } -} - -impl WalkDirTask> -where - DBProxy: WalkerDBProxy, - IsoPathFactory: IsoFilePathFactory, -{ - pub fn new_shallow( - entry: impl Into + Send, - root: Arc, - indexer_ruler: IndexerRuler, - iso_file_path_factory: IsoPathFactory, - db_proxy: DBProxy, - ) -> Result { - let entry = entry.into(); - Ok(Self { - id: TaskId::new_v4(), - root, - indexer_ruler, - entry_iso_file_path: iso_file_path_factory.build(&entry.path, true)?, - iso_file_path_factory, - db_proxy, - stage: WalkerStage::Start, - entry, - maybe_dispatcher: None, - is_shallow: true, - errors: Vec::new(), - scan_time: Duration::ZERO, - }) - } -} - -impl SerializableTask - for WalkDirTask -where - DBProxy: WalkerDBProxy, - IsoPathFactory: IsoFilePathFactory, - Dispatcher: TaskDispatcher, -{ - type SerializeError = rmp_serde::encode::Error; - type DeserializeError = rmp_serde::decode::Error; - type DeserializeCtx = (IndexerRuler, DBProxy, IsoPathFactory, Dispatcher); - - async fn serialize(self) -> Result, Self::SerializeError> { - let Self { - id, - entry, - root, - entry_iso_file_path, - stage, - errors, - scan_time, - is_shallow, - .. - } = self; - rmp_serde::to_vec_named(&WalkDirSaveState { - id, - entry, - root, - entry_iso_file_path, - stage: stage.into(), - errors, - scan_time, - is_shallow, - }) - } - - async fn deserialize( - data: &[u8], - (indexer_ruler, db_proxy, iso_file_path_factory, dispatcher): Self::DeserializeCtx, - ) -> Result { - rmp_serde::from_slice(data).map( - |WalkDirSaveState { - id, - entry, - root, - entry_iso_file_path, - stage, - errors, - scan_time, - is_shallow, - }| Self { - id, - entry, - root, - entry_iso_file_path, - indexer_ruler, - iso_file_path_factory, - db_proxy, - stage: stage.into(), - maybe_dispatcher: is_shallow.then_some(dispatcher), - errors, - scan_time, - is_shallow, - }, - ) - } +#[derive(Debug)] +pub struct WalkTaskOutput { + pub to_create: Vec, + pub to_update: Vec, + pub to_remove: Vec, + pub non_indexed_paths: Vec, + pub accepted_ancestors: HashSet, + pub errors: Vec, + pub directory_iso_file_path: IsolatedFilePathData<'static>, + pub total_size: u64, + pub handles: Vec>, + pub scan_time: Duration, } #[async_trait::async_trait] @@ -523,7 +121,7 @@ where self.is_shallow } - #[allow(clippy::too_many_lines)] + #[instrument(skip(self, interrupter), fields(task_id = %self.id, walked_entry = %self.entry.path.display()))] async fn run(&mut self, interrupter: &Interrupter) -> Result { let Self { root, @@ -544,14 +142,23 @@ where let start_time = Instant::now(); - let (to_create, to_update, total_size, to_remove, accepted_ancestors, handles) = loop { + let ( + to_create, + to_update, + to_remove, + non_indexed_paths, + accepted_ancestors, + total_size, + handles, + ) = loop { match stage { WalkerStage::Start => { - if indexer_ruler.has_system(&GITIGNORE).await { + trace!("Preparing git indexer rules for walking root"); + if indexer_ruler.has_system(&GITIGNORE) { if let Some(rules) = GitIgnoreRules::get_rules_if_in_git_repo(root.as_ref(), path).await { - indexer_ruler.extend(rules.map(Into::into)).await; + indexer_ruler.extend(rules.map(Into::into)); } } @@ -566,16 +173,23 @@ where )?), found_paths: Vec::new(), }; + trace!("Starting to walk!"); } WalkerStage::Walking { read_dir_stream, found_paths, } => { + trace!("Walking..."); while let Some(res) = read_dir_stream.next().await { match res { Ok(dir_entry) => { found_paths.push(dir_entry.path()); + trace!( + new_path = %dir_entry.path().display(), + total_paths = found_paths.len(), + "Found path" + ); } Err(e) => { errors.push(NonCriticalError::Indexer( @@ -589,6 +203,8 @@ where check_interruption!(interrupter, start_time, scan_time); } + trace!(total_paths = found_paths.len(), "Finished walking!"); + *stage = WalkerStage::CollectingMetadata { found_paths: mem::take(found_paths), }; @@ -597,9 +213,11 @@ where } WalkerStage::CollectingMetadata { found_paths } => { + trace!("Collecting metadata for found paths"); *stage = WalkerStage::CheckingIndexerRules { paths_and_metadatas: collect_metadata(found_paths, errors).await, }; + trace!("Finished collecting metadata!"); check_interruption!(interrupter, start_time, scan_time); } @@ -607,6 +225,7 @@ where WalkerStage::CheckingIndexerRules { paths_and_metadatas, } => { + trace!("Checking indexer rules for found paths"); *stage = WalkerStage::ProcessingRulesResults { paths_metadatas_and_acceptance: apply_indexer_rules( paths_and_metadatas, @@ -615,6 +234,7 @@ where ) .await, }; + trace!("Finished checking indexer rules!"); check_interruption!(interrupter, start_time, scan_time); } @@ -622,21 +242,32 @@ where WalkerStage::ProcessingRulesResults { paths_metadatas_and_acceptance, } => { + trace!("Processing rules results"); let mut maybe_to_keep_walking = maybe_dispatcher.is_some().then(Vec::new); - let (accepted_paths, accepted_ancestors) = process_rules_results( - root, - iso_file_path_factory, - *parent_dir_accepted_by_its_children, - paths_metadatas_and_acceptance, - &mut maybe_to_keep_walking, - errors, - ) - .await; + let (accepted_paths, accepted_ancestors, rejected_paths) = + process_rules_results( + root, + iso_file_path_factory, + *parent_dir_accepted_by_its_children, + paths_metadatas_and_acceptance, + &mut maybe_to_keep_walking, + self.is_shallow, + errors, + ) + .await; + trace!( + total_accepted_paths = accepted_paths.len(), + total_accepted_ancestors = accepted_ancestors.len(), + collect_rejected_paths = self.is_shallow, + total_rejected_paths = rejected_paths.len(), + "Finished processing rules results!" + ); *stage = WalkerStage::GatheringFilePathsToRemove { accepted_paths, maybe_to_keep_walking, accepted_ancestors, + non_indexed_paths: rejected_paths, }; check_interruption!(interrupter, start_time, scan_time); @@ -646,7 +277,9 @@ where accepted_paths, maybe_to_keep_walking, accepted_ancestors, + non_indexed_paths, } => { + trace!("Gathering file paths to remove"); let (walking_entries, to_remove_entries) = gather_file_paths_to_remove( accepted_paths, entry_iso_file_path, @@ -655,12 +288,14 @@ where errors, ) .await; + trace!("Finished gathering file paths to remove!"); *stage = WalkerStage::Finalize { walking_entries, to_remove_entries, maybe_to_keep_walking: mem::take(maybe_to_keep_walking), accepted_ancestors: mem::take(accepted_ancestors), + non_indexed_paths: mem::take(non_indexed_paths), }; check_interruption!(interrupter, start_time, scan_time); @@ -672,17 +307,27 @@ where to_remove_entries, maybe_to_keep_walking, accepted_ancestors, + non_indexed_paths, } => { + trace!("Segregating creates and updates"); let (to_create, to_update, total_size) = segregate_creates_and_updates(walking_entries, db_proxy).await?; + trace!( + total_to_create = to_create.len(), + total_to_update = to_update.len(), + total_to_remove = to_remove_entries.len(), + total_non_indexed_paths = non_indexed_paths.len(), + total_size, + "Finished segregating creates and updates!" + ); let handles = keep_walking( root, indexer_ruler, iso_file_path_factory, db_proxy, - maybe_to_keep_walking, - maybe_dispatcher, + maybe_to_keep_walking.as_mut(), + maybe_dispatcher.as_ref(), errors, ) .await; @@ -690,9 +335,10 @@ where break ( to_create, to_update, - total_size, mem::take(to_remove_entries), + mem::take(non_indexed_paths), mem::take(accepted_ancestors), + total_size, handles, ); } @@ -707,6 +353,7 @@ where to_create, to_update, to_remove, + non_indexed_paths, accepted_ancestors, errors: mem::take(errors), directory_iso_file_path: mem::take(entry_iso_file_path), @@ -719,6 +366,100 @@ where } } +#[derive(Debug)] +enum WalkerStage { + Start, + Walking { + read_dir_stream: ReadDirStream, + found_paths: Vec, + }, + CollectingMetadata { + found_paths: Vec, + }, + CheckingIndexerRules { + paths_and_metadatas: HashMap, + }, + ProcessingRulesResults { + paths_metadatas_and_acceptance: + HashMap>)>, + }, + GatheringFilePathsToRemove { + accepted_paths: HashMap, + maybe_to_keep_walking: Option>, + accepted_ancestors: HashSet, + non_indexed_paths: Vec, + }, + Finalize { + walking_entries: Vec, + accepted_ancestors: HashSet, + to_remove_entries: Vec, + maybe_to_keep_walking: Option>, + non_indexed_paths: Vec, + }, +} + +impl WalkDirTask +where + DBProxy: WalkerDBProxy, + IsoPathFactory: IsoFilePathFactory, + Dispatcher: TaskDispatcher, +{ + pub fn new_deep( + entry: impl Into + Send, + root: Arc, + indexer_ruler: IndexerRuler, + iso_file_path_factory: IsoPathFactory, + db_proxy: DBProxy, + dispatcher: Dispatcher, + ) -> Result { + let entry = entry.into(); + Ok(Self { + id: TaskId::new_v4(), + root, + indexer_ruler, + entry_iso_file_path: iso_file_path_factory.build(&entry.path, true)?, + iso_file_path_factory, + db_proxy, + stage: WalkerStage::Start, + entry, + maybe_dispatcher: Some(dispatcher), + is_shallow: false, + errors: Vec::new(), + scan_time: Duration::ZERO, + }) + } +} + +impl WalkDirTask> +where + DBProxy: WalkerDBProxy, + IsoPathFactory: IsoFilePathFactory, +{ + pub fn new_shallow( + entry: impl Into + Send, + root: Arc, + indexer_ruler: IndexerRuler, + iso_file_path_factory: IsoPathFactory, + db_proxy: DBProxy, + ) -> Result { + let entry = entry.into(); + Ok(Self { + id: TaskId::new_v4(), + root, + indexer_ruler, + entry_iso_file_path: iso_file_path_factory.build(&entry.path, true)?, + iso_file_path_factory, + db_proxy, + stage: WalkerStage::Start, + entry, + maybe_dispatcher: None, + is_shallow: true, + errors: Vec::new(), + scan_time: Duration::ZERO, + }) + } +} + async fn segregate_creates_and_updates( walking_entries: &mut Vec, db_proxy: &impl WalkerDBProxy, @@ -802,8 +543,8 @@ async fn keep_walking( indexer_ruler: &IndexerRuler, iso_file_path_factory: &impl IsoFilePathFactory, db_proxy: &impl WalkerDBProxy, - maybe_to_keep_walking: &mut Option>, - dispatcher: &Option>, + maybe_to_keep_walking: Option<&mut Vec>, + dispatcher: Option<&impl TaskDispatcher>, errors: &mut Vec, ) -> Vec> { if let (Some(dispatcher), Some(to_keep_walking)) = (dispatcher, maybe_to_keep_walking) { @@ -859,241 +600,6 @@ async fn collect_metadata( .collect() } -async fn apply_indexer_rules( - paths_and_metadatas: &mut HashMap, - indexer_ruler: &IndexerRuler, - errors: &mut Vec, -) -> HashMap>)> { - paths_and_metadatas - .drain() - // TODO: Hard ignoring symlinks for now, but this should be configurable - .filter(|(_, metadata)| !metadata.is_symlink) - .map(|(current_path, metadata)| async { - indexer_ruler - .apply_all(¤t_path, &metadata) - .await - .map(|acceptance_per_rule_kind| { - (current_path, (metadata, acceptance_per_rule_kind)) - }) - .map_err(|e| indexer::NonCriticalIndexerError::IndexerRule(e.to_string())) - }) - .collect::>() - .join() - .await - .into_iter() - .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()) - .collect() -} - -async fn process_rules_results( - root: &Arc, - iso_file_path_factory: &impl IsoFilePathFactory, - parent_dir_accepted_by_its_children: Option, - paths_metadatas_and_acceptance: &mut HashMap< - PathBuf, - (InnerMetadata, HashMap>), - >, - maybe_to_keep_walking: &mut Option>, - errors: &mut Vec, -) -> (HashMap, HashSet) { - let root = root.as_ref(); - - let (accepted, accepted_ancestors) = paths_metadatas_and_acceptance.drain().fold( - (HashMap::new(), HashMap::new()), - |(mut accepted, mut accepted_ancestors), - (current_path, (metadata, acceptance_per_rule_kind))| { - // Accept by children has three states, - // None if we don't now yet or if this check doesn't apply - // Some(true) if this check applies and it passes - // Some(false) if this check applies and it was rejected - // and we pass the current parent state to its children - let mut accept_by_children_dir = parent_dir_accepted_by_its_children; - - if rejected_by_reject_glob(&acceptance_per_rule_kind) { - trace!( - "Path {} rejected by `RuleKind::RejectFilesByGlob`", - current_path.display() - ); - - return (accepted, accepted_ancestors); - } - - let is_dir = metadata.is_dir(); - - if is_dir - && process_and_maybe_reject_by_directory_rules( - ¤t_path, - &acceptance_per_rule_kind, - &mut accept_by_children_dir, - maybe_to_keep_walking, - ) { - trace!( - "Path {} rejected by rule `RuleKind::RejectIfChildrenDirectoriesArePresent`", - current_path.display(), - ); - return (accepted, accepted_ancestors); - } - - if rejected_by_accept_glob(&acceptance_per_rule_kind) { - trace!( - "Path {} reject because it didn't passed in any AcceptFilesByGlob rules", - current_path.display() - ); - return (accepted, accepted_ancestors); - } - - if accept_by_children_dir.unwrap_or(true) { - accept_ancestors( - current_path, - metadata, - root, - &mut accepted, - iso_file_path_factory, - &mut accepted_ancestors, - errors, - ); - } - - (accepted, accepted_ancestors) - }, - ); - - ( - accepted, - accepted_ancestors - .into_iter() - .map(|(ancestor_iso_file_path, ancestor_path)| async move { - fs::metadata(&ancestor_path) - .await - .map_err(|e| { - indexer::NonCriticalIndexerError::Metadata( - FileIOError::from((&ancestor_path, e)).to_string(), - ) - }) - .and_then(|metadata| { - FilePathMetadata::from_path(&ancestor_path, &metadata) - .map(|metadata| { - WalkingEntry { - iso_file_path: ancestor_iso_file_path, - metadata, - } - .into() - }) - .map_err(|e| { - indexer::NonCriticalIndexerError::FilePathMetadata(e.to_string()) - }) - }) - }) - .collect::>() - .join() - .await - .into_iter() - .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()) - .collect(), - ) -} - -fn process_and_maybe_reject_by_directory_rules( - current_path: &Path, - acceptance_per_rule_kind: &HashMap>, - accept_by_children_dir: &mut Option, - maybe_to_keep_walking: &mut Option>, -) -> bool { - // If it is a directory, first we check if we must reject it and its children entirely - if rejected_by_children_directories(acceptance_per_rule_kind) { - return true; - } - - // Then we check if we must accept it and its children - if let Some(accepted_by_children_rules) = - acceptance_per_rule_kind.get(&RuleKind::AcceptIfChildrenDirectoriesArePresent) - { - if accepted_by_children_rules.iter().any(|accept| *accept) { - *accept_by_children_dir = Some(true); - } - - // If it wasn't accepted then we mark as rejected - if accept_by_children_dir.is_none() { - trace!( - "Path {} rejected because it didn't passed in any AcceptIfChildrenDirectoriesArePresent rule", - current_path.display() - ); - *accept_by_children_dir = Some(false); - } - } - - // Then we mark this directory to maybe be walked in too - if let Some(ref mut to_keep_walking) = maybe_to_keep_walking { - to_keep_walking.push(ToWalkEntry { - path: current_path.to_path_buf(), - parent_dir_accepted_by_its_children: *accept_by_children_dir, - }); - } - - false -} - -fn accept_ancestors( - current_path: PathBuf, - metadata: InnerMetadata, - root: &Path, - accepted: &mut HashMap, - iso_file_path_factory: &impl IsoFilePathFactory, - accepted_ancestors: &mut HashMap, PathBuf>, - errors: &mut Vec, -) { - // If the ancestors directories wasn't indexed before, now we do - for ancestor in current_path - .ancestors() - .skip(1) // Skip the current directory as it was already indexed - .take_while(|&ancestor| ancestor != root) - { - if let Ok(iso_file_path) = iso_file_path_factory.build(ancestor, true).map_err(|e| { - errors.push(indexer::NonCriticalIndexerError::IsoFilePath(e.to_string()).into()); - }) { - match accepted_ancestors.entry(iso_file_path) { - Entry::Occupied(_) => { - // If we already accepted this ancestor, then it will contain - // also all if its ancestors too, so we can stop here - break; - } - Entry::Vacant(entry) => { - trace!("Accepted ancestor {}", ancestor.display()); - entry.insert(ancestor.to_path_buf()); - } - } - } - } - - accepted.insert(current_path, metadata); -} - -fn rejected_by_accept_glob(acceptance_per_rule_kind: &HashMap>) -> bool { - acceptance_per_rule_kind - .get(&RuleKind::AcceptFilesByGlob) - .map_or(false, |accept_rules| { - accept_rules.iter().all(|accept| !accept) - }) -} - -fn rejected_by_children_directories( - acceptance_per_rule_kind: &HashMap>, -) -> bool { - acceptance_per_rule_kind - .get(&RuleKind::RejectIfChildrenDirectoriesArePresent) - .map_or(false, |reject_results| { - reject_results.iter().any(|reject| !reject) - }) -} - -fn rejected_by_reject_glob(acceptance_per_rule_kind: &HashMap>) -> bool { - acceptance_per_rule_kind - .get(&RuleKind::RejectFilesByGlob) - .map_or(false, |reject_results| { - reject_results.iter().any(|reject| !reject) - }) -} - async fn gather_file_paths_to_remove( accepted_paths: &mut HashMap, entry_iso_file_path: &IsolatedFilePathData<'_>, @@ -1152,6 +658,7 @@ mod tests { use tokio::{fs, io::AsyncWriteExt}; use tracing::debug; use tracing_test::traced_test; + use uuid::Uuid; #[derive(Debug, Clone)] struct DummyIsoPathFactory { @@ -1203,6 +710,7 @@ mod tests { } } + #[allow(clippy::cognitive_complexity)] async fn prepare_location() -> TempDir { // root // |__ rust_project diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/rules.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/rules.rs new file mode 100644 index 000000000000..069515c4fcc4 --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/rules.rs @@ -0,0 +1,319 @@ +use crate::{indexer, NonCriticalError}; + +use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData}; +use sd_core_indexer_rules::{IndexerRuler, MetadataForIndexerRules, RuleKind}; + +use sd_utils::error::FileIOError; + +use std::{ + collections::{hash_map::Entry, HashMap, HashSet}, + path::{Path, PathBuf}, + sync::Arc, +}; + +use futures_concurrency::future::Join; +use tokio::fs; +use tracing::{instrument, trace}; + +use super::{ + entry::{ToWalkEntry, WalkingEntry}, + InnerMetadata, IsoFilePathFactory, WalkedEntry, +}; + +pub(super) async fn apply_indexer_rules( + paths_and_metadatas: &mut HashMap, + indexer_ruler: &IndexerRuler, + errors: &mut Vec, +) -> HashMap>)> { + paths_and_metadatas + .drain() + // TODO: Hard ignoring symlinks for now, but this should be configurable + .filter(|(_, metadata)| !metadata.is_symlink) + .map(|(current_path, metadata)| async { + indexer_ruler + .apply_all(¤t_path, &metadata) + .await + .map(|acceptance_per_rule_kind| { + (current_path, (metadata, acceptance_per_rule_kind)) + }) + .map_err(|e| indexer::NonCriticalIndexerError::IndexerRule(e.to_string())) + }) + .collect::>() + .join() + .await + .into_iter() + .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()) + .collect() +} + +pub(super) async fn process_rules_results( + root: &Arc, + iso_file_path_factory: &impl IsoFilePathFactory, + parent_dir_accepted_by_its_children: Option, + paths_metadatas_and_acceptance: &mut HashMap< + PathBuf, + (InnerMetadata, HashMap>), + >, + maybe_to_keep_walking: &mut Option>, + collect_rejected_paths: bool, + errors: &mut Vec, +) -> ( + HashMap, + HashSet, + Vec, +) { + let (accepted, accepted_ancestors, rejected) = segregate_paths( + root, + iso_file_path_factory, + paths_metadatas_and_acceptance.drain(), + parent_dir_accepted_by_its_children, + maybe_to_keep_walking, + collect_rejected_paths, + errors, + ); + + ( + accepted, + accepted_ancestors + .into_iter() + .map(|(ancestor_iso_file_path, ancestor_path)| async move { + fs::metadata(&ancestor_path) + .await + .map_err(|e| { + indexer::NonCriticalIndexerError::Metadata( + FileIOError::from((&ancestor_path, e)).to_string(), + ) + }) + .and_then(|metadata| { + FilePathMetadata::from_path(&ancestor_path, &metadata) + .map(|metadata| { + WalkingEntry { + iso_file_path: ancestor_iso_file_path, + metadata, + } + .into() + }) + .map_err(|e| { + indexer::NonCriticalIndexerError::FilePathMetadata(e.to_string()) + }) + }) + }) + .collect::>() + .join() + .await + .into_iter() + .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()) + .collect(), + rejected, + ) +} + +fn segregate_paths( + root: &Arc, + iso_file_path_factory: &impl IsoFilePathFactory, + paths_metadatas_and_acceptance: impl IntoIterator< + Item = (PathBuf, (InnerMetadata, HashMap>)), + >, + parent_dir_accepted_by_its_children: Option, + maybe_to_keep_walking: &mut Option>, + collect_rejected_paths: bool, + errors: &mut Vec, +) -> ( + HashMap, + HashMap, PathBuf>, + Vec, +) { + let root = root.as_ref(); + + let mut accepted = HashMap::new(); + let mut accepted_ancestors = HashMap::new(); + let mut rejected = Vec::new(); + + for (current_path, (metadata, acceptance_per_rule_kind)) in paths_metadatas_and_acceptance { + // Accept by children has three states, + // None if we don't now yet or if this check doesn't apply + // Some(true) if this check applies and it passes + // Some(false) if this check applies and it was rejected + // and we pass the current parent state to its children + let mut accept_by_children_dir = parent_dir_accepted_by_its_children; + + if !reject_path( + ¤t_path, + &metadata, + &acceptance_per_rule_kind, + &mut accept_by_children_dir, + maybe_to_keep_walking, + ) && accept_by_children_dir.unwrap_or(true) + { + accept_path_and_ancestors( + current_path, + metadata, + root, + &mut accepted, + iso_file_path_factory, + &mut accepted_ancestors, + errors, + ); + + continue; + } + + if collect_rejected_paths { + rejected.push(current_path); + } + } + + (accepted, accepted_ancestors, rejected) +} + +#[instrument(skip_all, fields(current_path = %current_path.display()))] +fn reject_path( + current_path: &Path, + metadata: &InnerMetadata, + acceptance_per_rule_kind: &HashMap>, + accept_by_children_dir: &mut Option, + maybe_to_keep_walking: &mut Option>, +) -> bool { + rejected_by_reject_glob(acceptance_per_rule_kind) + || rejected_by_git_ignore(acceptance_per_rule_kind) + || (metadata.is_dir() + && process_and_maybe_reject_by_directory_rules( + current_path, + acceptance_per_rule_kind, + accept_by_children_dir, + maybe_to_keep_walking, + )) || rejected_by_accept_glob(acceptance_per_rule_kind) +} + +fn process_and_maybe_reject_by_directory_rules( + current_path: &Path, + acceptance_per_rule_kind: &HashMap>, + accept_by_children_dir: &mut Option, + maybe_to_keep_walking: &mut Option>, +) -> bool { + // If it is a directory, first we check if we must reject it and its children entirely + if rejected_by_children_directories(acceptance_per_rule_kind) { + return true; + } + + // Then we check if we must accept it and its children + if let Some(accepted_by_children_rules) = + acceptance_per_rule_kind.get(&RuleKind::AcceptIfChildrenDirectoriesArePresent) + { + if accepted_by_children_rules.iter().any(|accept| *accept) { + *accept_by_children_dir = Some(true); + } + + // If it wasn't accepted then we mark as rejected + if accept_by_children_dir.is_none() { + trace!( + "Rejected because it didn't passed in any \ + `RuleKind::AcceptIfChildrenDirectoriesArePresent` rule", + ); + *accept_by_children_dir = Some(false); + } + } + + // Then we mark this directory to maybe be walked in too + if let Some(ref mut to_keep_walking) = maybe_to_keep_walking { + to_keep_walking.push(ToWalkEntry { + path: current_path.to_path_buf(), + parent_dir_accepted_by_its_children: *accept_by_children_dir, + }); + } + + false +} + +fn accept_path_and_ancestors( + current_path: PathBuf, + metadata: InnerMetadata, + root: &Path, + accepted: &mut HashMap, + iso_file_path_factory: &impl IsoFilePathFactory, + accepted_ancestors: &mut HashMap, PathBuf>, + errors: &mut Vec, +) { + // If the ancestors directories wasn't indexed before, now we do + for ancestor in current_path + .ancestors() + .skip(1) // Skip the current directory as it was already indexed + .take_while(|&ancestor| ancestor != root) + { + if let Ok(iso_file_path) = iso_file_path_factory.build(ancestor, true).map_err(|e| { + errors.push(indexer::NonCriticalIndexerError::IsoFilePath(e.to_string()).into()); + }) { + match accepted_ancestors.entry(iso_file_path) { + Entry::Occupied(_) => { + // If we already accepted this ancestor, then it will contain + // also all if its ancestors too, so we can stop here + break; + } + Entry::Vacant(entry) => { + trace!("Accepted ancestor {}", ancestor.display()); + entry.insert(ancestor.to_path_buf()); + } + } + } + } + + accepted.insert(current_path, metadata); +} + +fn rejected_by_accept_glob(acceptance_per_rule_kind: &HashMap>) -> bool { + let res = acceptance_per_rule_kind + .get(&RuleKind::AcceptFilesByGlob) + .map_or(false, |accept_rules| { + accept_rules.iter().all(|accept| !accept) + }); + + if res { + trace!("Reject because it didn't passed in any `RuleKind::AcceptFilesByGlob` rules"); + } + + res +} + +fn rejected_by_children_directories( + acceptance_per_rule_kind: &HashMap>, +) -> bool { + let res = acceptance_per_rule_kind + .get(&RuleKind::RejectIfChildrenDirectoriesArePresent) + .map_or(false, |reject_results| { + reject_results.iter().any(|reject| !reject) + }); + + if res { + trace!("Rejected by rule `RuleKind::RejectIfChildrenDirectoriesArePresent`"); + } + + res +} + +fn rejected_by_reject_glob(acceptance_per_rule_kind: &HashMap>) -> bool { + let res = acceptance_per_rule_kind + .get(&RuleKind::RejectFilesByGlob) + .map_or(false, |reject_results| { + reject_results.iter().any(|reject| !reject) + }); + + if res { + trace!("Rejected by `RuleKind::RejectFilesByGlob`"); + } + + res +} + +fn rejected_by_git_ignore(acceptance_per_rule_kind: &HashMap>) -> bool { + let res = acceptance_per_rule_kind + .get(&RuleKind::IgnoredByGit) + .map_or(false, |reject_results| { + reject_results.iter().any(|reject| !reject) + }); + + if res { + trace!("Rejected by `RuleKind::IgnoredByGit`"); + } + + res +} diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs new file mode 100644 index 000000000000..50d86c9673b7 --- /dev/null +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs @@ -0,0 +1,219 @@ +use crate::{Error, NonCriticalError}; + +use sd_core_file_path_helper::IsolatedFilePathData; +use sd_core_indexer_rules::{IndexerRuler, RuleKind}; +use sd_core_prisma_helpers::file_path_pub_and_cas_ids; + +use std::{ + collections::{HashMap, HashSet}, + path::PathBuf, + sync::Arc, + time::Duration, +}; + +use sd_task_system::{SerializableTask, TaskDispatcher, TaskId}; +use serde::{Deserialize, Serialize}; + +use super::{ + entry::{ToWalkEntry, WalkingEntry}, + metadata::InnerMetadata, + IsoFilePathFactory, WalkDirTask, WalkedEntry, WalkerDBProxy, WalkerStage, +}; + +#[derive(Debug, Serialize, Deserialize)] +pub(super) struct WalkDirSaveState { + id: TaskId, + entry: ToWalkEntry, + root: Arc, + entry_iso_file_path: IsolatedFilePathData<'static>, + stage: WalkerStageSaveState, + errors: Vec, + scan_time: Duration, + is_shallow: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +pub(super) enum WalkerStageSaveState { + Start, + CollectingMetadata { + found_paths: Vec, + }, + CheckingIndexerRules { + paths_and_metadatas: HashMap, + }, + ProcessingRulesResults { + paths_metadatas_and_acceptance: + HashMap>)>, + }, + GatheringFilePathsToRemove { + accepted_paths: HashMap, + maybe_to_keep_walking: Option>, + accepted_ancestors: HashSet, + non_indexed_paths: Vec, + }, + Finalize { + walking_entries: Vec, + accepted_ancestors: HashSet, + to_remove_entries: Vec, + maybe_to_keep_walking: Option>, + non_indexed_paths: Vec, + }, +} + +impl From for WalkerStageSaveState { + fn from(stage: WalkerStage) -> Self { + match stage { + // We can't store the current state of `ReadDirStream` so we start again from the beginning + WalkerStage::Start | WalkerStage::Walking { .. } => Self::Start, + WalkerStage::CollectingMetadata { found_paths } => { + Self::CollectingMetadata { found_paths } + } + WalkerStage::CheckingIndexerRules { + paths_and_metadatas, + } => Self::CheckingIndexerRules { + paths_and_metadatas, + }, + WalkerStage::ProcessingRulesResults { + paths_metadatas_and_acceptance, + } => Self::ProcessingRulesResults { + paths_metadatas_and_acceptance, + }, + WalkerStage::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + non_indexed_paths, + } => Self::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + non_indexed_paths, + }, + WalkerStage::Finalize { + walking_entries, + accepted_ancestors, + to_remove_entries, + maybe_to_keep_walking, + non_indexed_paths, + } => Self::Finalize { + walking_entries, + accepted_ancestors, + to_remove_entries, + maybe_to_keep_walking, + non_indexed_paths, + }, + } + } +} + +impl From for WalkerStage { + fn from(value: WalkerStageSaveState) -> Self { + match value { + WalkerStageSaveState::Start => Self::Start, + WalkerStageSaveState::CollectingMetadata { found_paths } => { + Self::CollectingMetadata { found_paths } + } + WalkerStageSaveState::CheckingIndexerRules { + paths_and_metadatas, + } => Self::CheckingIndexerRules { + paths_and_metadatas, + }, + WalkerStageSaveState::ProcessingRulesResults { + paths_metadatas_and_acceptance, + } => Self::ProcessingRulesResults { + paths_metadatas_and_acceptance, + }, + WalkerStageSaveState::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + non_indexed_paths, + } => Self::GatheringFilePathsToRemove { + accepted_paths, + maybe_to_keep_walking, + accepted_ancestors, + non_indexed_paths, + }, + WalkerStageSaveState::Finalize { + walking_entries, + accepted_ancestors, + to_remove_entries, + maybe_to_keep_walking, + non_indexed_paths, + } => Self::Finalize { + walking_entries, + accepted_ancestors, + to_remove_entries, + maybe_to_keep_walking, + non_indexed_paths, + }, + } + } +} + +impl SerializableTask + for WalkDirTask +where + DBProxy: WalkerDBProxy, + IsoPathFactory: IsoFilePathFactory, + Dispatcher: TaskDispatcher, +{ + type SerializeError = rmp_serde::encode::Error; + type DeserializeError = rmp_serde::decode::Error; + type DeserializeCtx = (IndexerRuler, DBProxy, IsoPathFactory, Dispatcher); + + async fn serialize(self) -> Result, Self::SerializeError> { + let Self { + id, + entry, + root, + entry_iso_file_path, + stage, + errors, + scan_time, + is_shallow, + .. + } = self; + rmp_serde::to_vec_named(&WalkDirSaveState { + id, + entry, + root, + entry_iso_file_path, + stage: stage.into(), + errors, + scan_time, + is_shallow, + }) + } + + async fn deserialize( + data: &[u8], + (indexer_ruler, db_proxy, iso_file_path_factory, dispatcher): Self::DeserializeCtx, + ) -> Result { + rmp_serde::from_slice(data).map( + |WalkDirSaveState { + id, + entry, + root, + entry_iso_file_path, + stage, + errors, + scan_time, + is_shallow, + }| Self { + id, + entry, + root, + entry_iso_file_path, + indexer_ruler, + iso_file_path_factory, + db_proxy, + stage: stage.into(), + maybe_dispatcher: is_shallow.then_some(dispatcher), + errors, + scan_time, + is_shallow, + }, + ) + } +} diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs index 1a0ccc21ef0c..73ed518b00f2 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs @@ -148,25 +148,25 @@ impl Task for Thumbnailer { output.total_time += start.elapsed(); - #[allow(clippy::cast_precision_loss)] - // SAFETY: we're probably won't have 2^52 thumbnails being generated on a single task for this cast to have - // a precision loss issue - let total = (output.generated + output.skipped) as f64; - - let mean_generation_time = output.mean_time_acc / total; - - let generation_time_std_dev = Duration::from_secs_f64( - (mean_generation_time.mul_add(-mean_generation_time, output.std_dev_acc / total)) - .sqrt(), - ); - - trace!( - "{{generated: {generated}, skipped: {skipped}}} thumbnails; \ - mean generation time: {mean_generation_time:?} ± {generation_time_std_dev:?}", - generated = output.generated, - skipped = output.skipped, - mean_generation_time = Duration::from_secs_f64(mean_generation_time) - ); + if output.generated > 1 { + #[allow(clippy::cast_precision_loss)] + // SAFETY: we're probably won't have 2^52 thumbnails being generated on a single task for this cast to have + // a precision loss issue + let total = (output.generated + output.skipped) as f64; + let mean_generation_time_f64 = output.mean_time_acc / total; + + trace!( + generated = output.generated, + skipped = output.skipped, + "mean generation time: {mean_generation_time:?} ± {generation_time_std_dev:?}", + mean_generation_time = Duration::from_secs_f64(mean_generation_time_f64), + generation_time_std_dev = Duration::from_secs_f64( + (mean_generation_time_f64 + .mul_add(-mean_generation_time_f64, output.std_dev_acc / total)) + .sqrt(), + ) + ); + } Ok(ExecStatus::Done(mem::take(output).into_output())) } diff --git a/core/crates/indexer-rules/src/lib.rs b/core/crates/indexer-rules/src/lib.rs index e74198e4e2cc..2b934104e585 100644 --- a/core/crates/indexer-rules/src/lib.rs +++ b/core/crates/indexer-rules/src/lib.rs @@ -294,7 +294,7 @@ impl RulePerKind { )), Self::IgnoredByGit(git_repo, patterns) => Ok(( RuleKind::IgnoredByGit, - accept_by_gitpattern(source.as_ref(), git_repo, patterns), + accept_by_git_pattern(source.as_ref(), git_repo, patterns), )), } } @@ -326,13 +326,13 @@ impl RulePerKind { )), Self::IgnoredByGit(base_dir, patterns) => Ok(( RuleKind::IgnoredByGit, - accept_by_gitpattern(source.as_ref(), base_dir, patterns), + accept_by_git_pattern(source.as_ref(), base_dir, patterns), )), } } } -fn accept_by_gitpattern(source: &Path, base_dir: &Path, search: &Search) -> bool { +fn accept_by_git_pattern(source: &Path, base_dir: &Path, search: &Search) -> bool { let relative = source .strip_prefix(base_dir) .expect("`base_dir` should be our git repo, and `source` should be inside of it"); @@ -414,39 +414,44 @@ impl IndexerRule { } } -#[derive(Debug, Clone, Default)] +#[derive(Debug, Default, Serialize, Deserialize)] pub struct IndexerRuler { - rules: Arc>>, + base: Arc>, + extra: Vec, +} + +impl Clone for IndexerRuler { + fn clone(&self) -> Self { + Self { + base: Arc::clone(&self.base), + // Each instance of IndexerRules MUST have its own extra rules no clones allowed! + extra: Vec::new(), + } + } } impl IndexerRuler { #[must_use] pub fn new(rules: Vec) -> Self { Self { - rules: Arc::new(RwLock::new(rules)), + base: Arc::new(rules), + extra: Vec::new(), } } - pub async fn serialize(&self) -> Result, encode::Error> { - rmp_serde::to_vec_named(&*self.rules.read().await) - } - - pub fn deserialize(data: &[u8]) -> Result { - rmp_serde::from_slice(data).map(Self::new) - } - pub async fn apply_all( &self, source: impl AsRef + Send, metadata: &impl MetadataForIndexerRules, ) -> Result>, IndexerRuleError> { async fn inner( - rules: &[IndexerRule], + base: &[IndexerRule], + extra: &[IndexerRule], source: &Path, metadata: &impl MetadataForIndexerRules, ) -> Result>, IndexerRuleError> { - rules - .iter() + base.iter() + .chain(extra.iter()) .map(|rule| rule.apply_with_metadata(source, metadata)) .collect::>() .try_join() @@ -462,19 +467,20 @@ impl IndexerRuler { }) } - inner(&self.rules.read().await, source.as_ref(), metadata).await + inner(&self.base, &self.extra, source.as_ref(), metadata).await } /// Extend the indexer rules with the contents from an iterator of rules - pub async fn extend(&self, iter: impl IntoIterator + Send) { - let mut indexer = self.rules.write().await; - indexer.extend(iter); + pub fn extend(&mut self, iter: impl IntoIterator + Send) { + self.extra.extend(iter); } - pub async fn has_system(&self, rule: &SystemIndexerRule) -> bool { - let rules = self.rules.read().await; - - rules.iter().any(|inner_rule| rule == inner_rule) + #[must_use] + pub fn has_system(&self, rule: &SystemIndexerRule) -> bool { + self.base + .iter() + .chain(self.extra.iter()) + .any(|inner_rule| rule == inner_rule) } } diff --git a/core/crates/indexer-rules/src/seed.rs b/core/crates/indexer-rules/src/seed.rs index 960e344dbd06..74665f2fc56e 100644 --- a/core/crates/indexer-rules/src/seed.rs +++ b/core/crates/indexer-rules/src/seed.rs @@ -1,16 +1,16 @@ +use sd_prisma::prisma::{indexer_rule, PrismaClient}; + use std::path::{Path, PathBuf}; +use chrono::Utc; use futures_concurrency::future::Join; use gix_ignore::{glob::search::pattern::List, search::Ignore, Search}; -use sd_prisma::prisma::{indexer_rule, PrismaClient}; - -use chrono::Utc; +use once_cell::sync::Lazy; use thiserror::Error; use tokio::fs; use uuid::Uuid; use super::{IndexerRule, IndexerRuleError, RulePerKind}; -use once_cell::sync::Lazy; #[derive(Error, Debug)] pub enum SeederError { @@ -19,7 +19,7 @@ pub enum SeederError { #[error("An error occurred with the database while applying migrations: {0}")] DatabaseError(#[from] prisma_client_rust::QueryError), #[error("Failed to parse indexer rules based on external system")] - InhirentedExternalRules, + InheritedExternalRules, } #[derive(Debug)] @@ -29,7 +29,7 @@ pub struct GitIgnoreRules { impl GitIgnoreRules { pub async fn get_rules_if_in_git_repo( - library_root: &Path, + location_root: &Path, current: &Path, ) -> Option> { let mut git_repo = None; @@ -38,7 +38,7 @@ impl GitIgnoreRules { for ancestor in current .ancestors() - .take_while(|&path| path.starts_with(library_root)) + .take_while(|&path| path.starts_with(location_root)) { let git_ignore = ancestor.join(".gitignore"); @@ -54,13 +54,16 @@ impl GitIgnoreRules { } let git_repo = git_repo?; - Some(Self::parse_gitrepo(git_repo, ignores).await) + Some(Self::parse_git_repo(git_repo, ignores).await) } - async fn parse_gitrepo(git_repo: &Path, gitignores: Vec) -> Result { + async fn parse_git_repo( + git_repo: &Path, + git_ignores: Vec, + ) -> Result { let mut search = Search::default(); - let gitignores = gitignores + let git_ignores = git_ignores .into_iter() .map(Self::parse_git_ignore) .collect::>() @@ -68,7 +71,7 @@ impl GitIgnoreRules { .await; search .patterns - .extend(gitignores.into_iter().filter_map(Result::ok)); + .extend(git_ignores.into_iter().filter_map(Result::ok)); let git_exclude_rules = Self::parse_git_exclude(git_repo.join(".git")).await; if let Ok(rules) = git_exclude_rules { @@ -86,11 +89,11 @@ impl GitIgnoreRules { if let Ok(Some(patterns)) = List::from_file(gitignore, None, true, &mut buf) { Ok(patterns) } else { - Err(SeederError::InhirentedExternalRules) + Err(SeederError::InheritedExternalRules) } }) .await - .map_err(|_| SeederError::InhirentedExternalRules)? + .map_err(|_| SeederError::InheritedExternalRules)? } async fn parse_git_exclude(dot_git: PathBuf) -> Result>, SeederError> { @@ -98,10 +101,10 @@ impl GitIgnoreRules { let mut buf = Vec::new(); Search::from_git_dir(dot_git.as_ref(), None, &mut buf) .map(|search| search.patterns) - .map_err(|_| SeederError::InhirentedExternalRules) + .map_err(|_| SeederError::InheritedExternalRules) }) .await - .map_err(|_| SeederError::InhirentedExternalRules)? + .map_err(|_| SeederError::InheritedExternalRules)? } async fn is_git_repo(path: &Path) -> bool { diff --git a/crates/task-system/src/task.rs b/crates/task-system/src/task.rs index 41dbeb45a81f..68028fc0e791 100644 --- a/crates/task-system/src/task.rs +++ b/crates/task-system/src/task.rs @@ -303,8 +303,14 @@ macro_rules! check_interruption { let interrupter: &Interrupter = $interrupter; match interrupter.try_check_interrupt() { - Some($crate::InterruptionKind::Cancel) => return Ok($crate::ExecStatus::Canceled), - Some($crate::InterruptionKind::Pause) => return Ok($crate::ExecStatus::Paused), + Some($crate::InterruptionKind::Cancel) => { + ::tracing::trace!("Task was canceled by the user"); + return Ok($crate::ExecStatus::Canceled); + } + Some($crate::InterruptionKind::Pause) => { + ::tracing::trace!("Task was paused by the user or suspended by the task system"); + return Ok($crate::ExecStatus::Paused); + } None => { /* Everything is Awesome! */ } } }; @@ -317,11 +323,13 @@ macro_rules! check_interruption { match interrupter.try_check_interrupt() { Some($crate::InterruptionKind::Cancel) => { *duration_accumulator += instant.elapsed(); + ::tracing::trace!("Task was canceled by the user"); return Ok($crate::ExecStatus::Canceled); } Some($crate::InterruptionKind::Pause) => { *duration_accumulator += instant.elapsed(); + ::tracing::trace!("Task was paused by the user or suspended by the task system"); return Ok($crate::ExecStatus::Paused); } From 15863b620b03330adff871f39bfa9a4f3ff4b59e Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Fri, 24 May 2024 02:48:02 -0300 Subject: [PATCH 19/33] WIP at fixing file identifier --- Cargo.lock | 2 + .../src/file_identifier/cas_id.rs | 10 + .../heavy-lifting/src/file_identifier/job.rs | 2 +- .../tasks/extract_file_metadata.rs | 345 +++++++++++++----- .../src/file_identifier/tasks/mod.rs | 146 +++++++- .../media_processor/helpers/thumbnailer.rs | 3 + core/crates/indexer-rules/src/lib.rs | 2 +- core/crates/indexer-rules/src/seed.rs | 2 +- core/crates/prisma-helpers/Cargo.toml | 4 +- core/crates/prisma-helpers/src/lib.rs | 180 +++++++++ core/crates/sync/src/manager.rs | 10 +- crates/utils/src/lib.rs | 2 +- 12 files changed, 609 insertions(+), 99 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 43b119732530..f1b04cb950dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9263,7 +9263,9 @@ version = "0.1.0" dependencies = [ "prisma-client-rust", "sd-prisma", + "sd-utils", "serde", + "uuid", ] [[package]] diff --git a/core/crates/heavy-lifting/src/file_identifier/cas_id.rs b/core/crates/heavy-lifting/src/file_identifier/cas_id.rs index 5ad5a9456690..4321546e721e 100644 --- a/core/crates/heavy-lifting/src/file_identifier/cas_id.rs +++ b/core/crates/heavy-lifting/src/file_identifier/cas_id.rs @@ -6,6 +6,7 @@ use tokio::{ fs::{self, File}, io::{self, AsyncReadExt, AsyncSeekExt, SeekFrom}, }; +use tracing::{instrument, trace, Level}; const SAMPLE_COUNT: u64 = 4; const SAMPLE_SIZE: u64 = 1024 * 10; @@ -20,6 +21,12 @@ const_assert!((HEADER_OR_FOOTER_SIZE * 2 + SAMPLE_COUNT * SAMPLE_SIZE) < MINIMUM // Asserting that the sample size is larger than header/footer size, as the same buffer is used for both const_assert!(SAMPLE_SIZE > HEADER_OR_FOOTER_SIZE); +#[instrument( + skip(path), + ret(level = Level::TRACE), + err, + fields(path = %path.as_ref().display() +))] // SAFETY: Casts here are safe, they're hardcoded values we have some const assertions above to make sure they're correct #[allow(clippy::cast_possible_truncation)] #[allow(clippy::cast_possible_wrap)] @@ -31,9 +38,12 @@ pub async fn generate_cas_id( hasher.update(&size.to_le_bytes()); if size <= MINIMUM_FILE_SIZE { + trace!("File is small, hashing the whole file"); // For small files, we hash the whole file hasher.update(&fs::read(path).await?); } else { + trace!("File bigger than threshold, hashing samples"); + let mut file = File::open(path).await?; let mut buf = vec![0; SAMPLE_SIZE as usize].into_boxed_slice(); diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index eab71967fa55..5abec957491a 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -89,7 +89,7 @@ impl Job for FileIdentifier { TaskKind::ExtractFileMetadata => { >::deserialize( &task_bytes, - (), + (Arc::clone(ctx.db()), Arc::clone(ctx.sync())), ) .await .map(IntoTask::into_task) diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs index 3bf36cc21ac3..d6560ed0fad8 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs @@ -4,13 +4,21 @@ use crate::{ }; use sd_core_file_path_helper::IsolatedFilePathData; -use sd_core_prisma_helpers::file_path_for_file_identifier; +use sd_core_prisma_helpers::{ + file_path_for_file_identifier, file_path_to_create_object, CasId, FilePathPubId, +}; +use sd_core_sync::Manager as SyncManager; -use sd_prisma::prisma::location; +use sd_file_ext::kind::ObjectKind; +use sd_prisma::{ + prisma::{file_path, location, PrismaClient}, + prisma_sync, +}; +use sd_sync::OperationFactory; use sd_task_system::{ ExecStatus, Interrupter, InterruptionKind, IntoAnyTaskOutput, SerializableTask, Task, TaskId, }; -use sd_utils::error::FileIOError; +use sd_utils::{error::FileIOError, msgpack}; use std::{ collections::HashMap, future::IntoFuture, mem, path::PathBuf, pin::pin, sync::Arc, @@ -18,63 +26,38 @@ use std::{ }; use futures::stream::{self, FuturesUnordered, StreamExt}; -use futures_concurrency::stream::Merge; +use futures_concurrency::{future::TryJoin, stream::Merge}; use serde::{Deserialize, Serialize}; use tokio::time::Instant; -use tracing::{error, trace}; -use uuid::Uuid; +use tracing::{error, instrument, trace, Level}; -use super::IdentifiedFile; +use super::{create_objects, IdentifiedFile, ObjectToCreateOrLink}; -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug)] pub struct ExtractFileMetadataTask { id: TaskId, location: Arc, location_path: Arc, - file_paths_by_id: HashMap, - identified_files: HashMap, - extract_metadata_time: Duration, - errors: Vec, + file_paths_by_id: HashMap, + identified_files: HashMap, + file_paths_without_cas_id: Vec<(file_path_to_create_object::Data, ObjectKind)>, with_priority: bool, + + output: Output, + + db: Arc, + sync: Arc, } -#[derive(Debug)] +#[derive(Debug, Default, Serialize, Deserialize)] pub struct Output { - pub identified_files: HashMap, + pub file_path_pub_ids_and_kinds_by_cas_id: HashMap>, pub extract_metadata_time: Duration, + pub save_db_time: Duration, + pub created_objects_count: u64, pub errors: Vec, } -impl ExtractFileMetadataTask { - #[must_use] - pub fn new( - location: Arc, - location_path: Arc, - file_paths: Vec, - with_priority: bool, - ) -> Self { - Self { - id: TaskId::new_v4(), - location, - location_path, - identified_files: HashMap::with_capacity(file_paths.len()), - file_paths_by_id: file_paths - .into_iter() - .map(|file_path| { - // SAFETY: This should never happen - ( - Uuid::from_slice(&file_path.pub_id).expect("file_path.pub_id is invalid!"), - file_path, - ) - }) - .collect(), - extract_metadata_time: Duration::ZERO, - errors: Vec::new(), - with_priority, - } - } -} - #[async_trait::async_trait] impl Task for ExtractFileMetadataTask { fn id(&self) -> TaskId { @@ -85,38 +68,49 @@ impl Task for ExtractFileMetadataTask { self.with_priority } + #[instrument( + skip(self, interrupter), + fields( + task_id = %self.id, + location_id = %self.location.id, + location_path = %self.location_path.display(), + files_count = %self.file_paths_by_id.len(), + ), + ret(level = Level::TRACE), + err, + )] + #[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above async fn run(&mut self, interrupter: &Interrupter) -> Result { // `Processed` is larger than `Interrupt`, but it's much more common // so we ignore the size difference to optimize for usage #[allow(clippy::large_enum_variant)] enum StreamMessage { - Processed(Uuid, Result), + Processed(FilePathPubId, Result), Interrupt(InterruptionKind), } let Self { - id, location, location_path, file_paths_by_id, + file_paths_without_cas_id, identified_files, - extract_metadata_time, - errors, + output, .. } = self; - let start_time = Instant::now(); - if !file_paths_by_id.is_empty() { + let start_time = Instant::now(); + let extraction_futures = file_paths_by_id .iter() .filter_map(|(file_path_id, file_path)| { try_iso_file_path_extraction( location.id, - *file_path_id, + file_path_id.clone(), file_path, Arc::clone(location_path), - errors, + &mut output.errors, ) }) .map(|(file_path_id, iso_file_path, location_path)| async move { @@ -140,39 +134,59 @@ impl Task for ExtractFileMetadataTask { .remove(&file_path_pub_id) .expect("file_path must be here"); - trace!("Processed file , {} files remaining", file_paths_by_id.len()); + trace!( + files_remaining = file_paths_by_id.len(), + %file_path_pub_id, + "Processed file", + ); match res { - Ok(FileMetadata { cas_id, kind, .. }) => { + Ok(FileMetadata { + cas_id: Some(cas_id), + kind, + .. + }) => { identified_files.insert( file_path_pub_id, - IdentifiedFile { - file_path, - cas_id, - kind, - }, + IdentifiedFile::new(file_path, cas_id, kind), ); } + Ok(FileMetadata { + cas_id: None, kind, .. + }) => { + let file_path_for_file_identifier::Data { + pub_id, + date_created, + .. + } = file_path; + file_paths_without_cas_id.push(( + file_path_to_create_object::Data { + pub_id, + date_created, + }, + kind, + )); + } Err(e) => { handle_non_critical_errors( - location.id, file_path_pub_id, &e, - errors, + &mut output.errors, ); } } if file_paths_by_id.is_empty() { - // All files have been processed so we can end this merged stream and don't keep waiting an - // interrupt signal + trace!("All files have been processed"); + // All files have been processed so we can end this merged stream + // and don't keep waiting an interrupt signal break; } } StreamMessage::Interrupt(kind) => { - trace!("Task received interrupt {kind:?}: "); - *extract_metadata_time += start_time.elapsed(); + trace!(?kind, "Interrupted"); + output.extract_metadata_time += start_time.elapsed(); return Ok(match kind { InterruptionKind::Pause => ExecStatus::Paused, InterruptionKind::Cancel => ExecStatus::Canceled, @@ -180,27 +194,125 @@ impl Task for ExtractFileMetadataTask { } } } + + output.extract_metadata_time = start_time.elapsed(); + + trace!( + identified_files_count = identified_files.len(), + "All files have been processed, saving cas_ids to db..." + ); + let start_time = Instant::now(); + // Assign cas_id to each file path + let (_, created) = ( + assign_cas_id_to_file_paths(identified_files, &self.db, &self.sync), + create_objects(file_paths_without_cas_id.iter(), &self.db, &self.sync), + ) + .try_join() + .await?; + + output.save_db_time = start_time.elapsed(); + output.created_objects_count = created; + output.file_path_pub_ids_and_kinds_by_cas_id = identified_files.drain().fold( + HashMap::new(), + |mut map, + ( + file_path_pub_id, + IdentifiedFile { + cas_id, + kind, + file_path, + }, + )| { + map.entry(cas_id) + .or_insert_with(|| Vec::with_capacity(1)) + .push(ObjectToCreateOrLink { + file_path_pub_id, + kind, + created_at: file_path.date_created, + }); + + map + }, + ); + + trace!(save_db_time = ?output.save_db_time, "Cas_ids saved to db"); } - Ok(ExecStatus::Done( - Output { - identified_files: mem::take(identified_files), - extract_metadata_time: *extract_metadata_time + start_time.elapsed(), - errors: mem::take(errors), - } - .into_output(), - )) + Ok(ExecStatus::Done(mem::take(output).into_output())) + } +} + +impl ExtractFileMetadataTask { + #[must_use] + pub fn new( + location: Arc, + location_path: Arc, + file_paths: Vec, + with_priority: bool, + db: Arc, + sync: Arc, + ) -> Self { + Self { + id: TaskId::new_v4(), + location, + location_path, + identified_files: HashMap::with_capacity(file_paths.len()), + file_paths_without_cas_id: Vec::with_capacity(file_paths.len()), + file_paths_by_id: file_paths + .into_iter() + .map(|file_path| (file_path.pub_id.as_slice().into(), file_path)) + .collect(), + output: Output::default(), + with_priority, + db, + sync, + } } } +#[instrument(skip_all, err, fields(identified_files_count = identified_files.len()))] +async fn assign_cas_id_to_file_paths( + identified_files: &HashMap, + db: &PrismaClient, + sync: &SyncManager, +) -> Result<(), file_identifier::Error> { + // Assign cas_id to each file path + sync.write_ops( + db, + identified_files + .iter() + .map(|(pub_id, IdentifiedFile { cas_id, .. })| { + ( + sync.shared_update( + prisma_sync::file_path::SyncId { + pub_id: pub_id.to_db(), + }, + file_path::cas_id::NAME, + msgpack!(cas_id), + ), + db.file_path() + .update( + file_path::pub_id::equals(pub_id.to_db()), + vec![file_path::cas_id::set(cas_id.into())], + ) + // We don't need any data here, just the id avoids receiving the entire object + // as we can't pass an empty select macro call + .select(file_path::select!({ id })), + ) + }) + .unzip::<_, _, _, Vec<_>>(), + ) + .await?; + + Ok(()) +} + +#[instrument(skip(errors))] fn handle_non_critical_errors( - location_id: location::id::Type, - file_path_pub_id: Uuid, + file_path_pub_id: FilePathPubId, e: &FileIOError, errors: &mut Vec, ) { - error!("Failed to extract file metadata : {e:#?}"); - let formatted_error = format!(""); #[cfg(target_os = "windows")] @@ -234,18 +346,26 @@ fn handle_non_critical_errors( } } +#[instrument( + skip(location_id, file_path, location_path, errors), + fields( + file_path_id = file_path.id, + materialized_path = ?file_path.materialized_path, + name = ?file_path.name, + extension = ?file_path.extension, + ) +)] fn try_iso_file_path_extraction( location_id: location::id::Type, - file_path_pub_id: Uuid, + file_path_pub_id: FilePathPubId, file_path: &file_path_for_file_identifier::Data, location_path: Arc, errors: &mut Vec, -) -> Option<(Uuid, IsolatedFilePathData<'static>, Arc)> { +) -> Option<(FilePathPubId, IsolatedFilePathData<'static>, Arc)> { IsolatedFilePathData::try_from((location_id, file_path)) .map(IsolatedFilePathData::to_owned) - .map(|iso_file_path| (file_path_pub_id, iso_file_path, location_path)) .map_err(|e| { - error!("Failed to extract isolated file path data: {e:#?}"); + error!(?e, "Failed to extract isolated file path data"); errors.push( file_identifier::NonCriticalFileIdentifierError::FailedToExtractIsolatedFilePathData(format!( "" @@ -253,24 +373,79 @@ fn try_iso_file_path_extraction( .into(), ); }) + .map(|iso_file_path| (file_path_pub_id, iso_file_path, location_path)) .ok() } +#[derive(Serialize, Deserialize)] +struct SaveState { + id: TaskId, + location: Arc, + location_path: Arc, + file_paths_by_id: HashMap, + identified_files: HashMap, + file_paths_without_cas_id: Vec<(file_path_to_create_object::Data, ObjectKind)>, + output: Output, + with_priority: bool, +} + impl SerializableTask for ExtractFileMetadataTask { type SerializeError = rmp_serde::encode::Error; type DeserializeError = rmp_serde::decode::Error; - type DeserializeCtx = (); + type DeserializeCtx = (Arc, Arc); async fn serialize(self) -> Result, Self::SerializeError> { - rmp_serde::to_vec_named(&self) + let Self { + id, + location, + location_path, + file_paths_by_id, + identified_files, + file_paths_without_cas_id, + output, + with_priority, + .. + } = self; + rmp_serde::to_vec_named(&SaveState { + id, + location, + location_path, + file_paths_by_id, + identified_files, + file_paths_without_cas_id, + output, + with_priority, + }) } async fn deserialize( data: &[u8], - (): Self::DeserializeCtx, + (db, sync): Self::DeserializeCtx, ) -> Result { - rmp_serde::from_slice(data) + rmp_serde::from_slice::(data).map( + |SaveState { + id, + location, + location_path, + file_paths_by_id, + identified_files, + file_paths_without_cas_id, + output, + with_priority, + }| Self { + id, + location, + location_path, + file_paths_by_id, + identified_files, + file_paths_without_cas_id, + output, + with_priority, + db, + sync, + }, + ) } } diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs index c06fc8ad03fa..c1f6db1e668f 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs @@ -1,8 +1,21 @@ -use sd_core_prisma_helpers::file_path_for_file_identifier; +use crate::file_identifier; + +use chrono::{DateTime, FixedOffset}; +use sd_core_prisma_helpers::{ + file_path_for_file_identifier, file_path_to_create_object, CasId, FilePathPubId, ObjectPubId, +}; +use sd_core_sync::Manager as SyncManager; use sd_file_ext::kind::ObjectKind; +use sd_prisma::{ + prisma::{file_path, object, PrismaClient}, + prisma_sync, +}; +use sd_sync::OperationFactory; +use sd_utils::msgpack; use serde::{Deserialize, Serialize}; +use tracing::{instrument, trace, Level}; pub mod extract_file_metadata; pub mod object_processor; @@ -12,7 +25,132 @@ pub use object_processor::ObjectProcessorTask; #[derive(Debug, Serialize, Deserialize)] pub(super) struct IdentifiedFile { - pub(super) file_path: file_path_for_file_identifier::Data, - pub(super) cas_id: Option, - pub(super) kind: ObjectKind, + file_path: file_path_for_file_identifier::Data, + cas_id: CasId, + kind: ObjectKind, +} + +impl IdentifiedFile { + pub fn new( + file_path: file_path_for_file_identifier::Data, + cas_id: impl Into, + kind: ObjectKind, + ) -> Self { + Self { + file_path, + cas_id: cas_id.into(), + kind, + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct ObjectToCreateOrLink { + file_path_pub_id: FilePathPubId, + kind: ObjectKind, + created_at: Option>, +} + +#[instrument(skip_all, ret(level = Level::TRACE), err)] +async fn create_objects( + files_and_kinds: impl IntoIterator, + db: &PrismaClient, + sync: &SyncManager, +) -> Result { + trace!("Creating new Objects!"); + + let (object_create_args, file_path_update_args) = files_and_kinds + .into_iter() + .map( + |( + file_path_to_create_object::Data { + pub_id: file_path_pub_id, + date_created, + }, + kind, + )| { + let object_pub_id = ObjectPubId::new(); + + let kind = *kind as i32; + + let (sync_params, db_params) = [ + ( + (object::date_created::NAME, msgpack!(date_created)), + object::date_created::set(*date_created), + ), + ( + (object::kind::NAME, msgpack!(kind)), + object::kind::set(Some(kind)), + ), + ] + .into_iter() + .unzip::<_, _, Vec<_>, Vec<_>>(); + + ( + ( + sync.shared_create( + prisma_sync::object::SyncId { + pub_id: object_pub_id.to_db(), + }, + sync_params, + ), + object::create_unchecked(object_pub_id.to_db(), db_params), + ), + ( + sync.shared_update( + prisma_sync::file_path::SyncId { + pub_id: file_path_pub_id.clone(), + }, + file_path::object::NAME, + msgpack!(prisma_sync::object::SyncId { + pub_id: object_pub_id.to_db() + }), + ), + db.file_path() + .update( + file_path::pub_id::equals(file_path_pub_id.clone()), + vec![file_path::object::connect(object::pub_id::equals( + object_pub_id.into(), + ))], + ) + // selecting just id to avoid fetching the whole object + .select(file_path::select!({ id })), + ), + ) + }, + ) + .unzip::<_, _, Vec<_>, Vec<_>>(); + + // create new object records with assembled values + let total_created_files = sync + .write_ops(db, { + let (sync, db_params) = object_create_args + .into_iter() + .unzip::<_, _, Vec<_>, Vec<_>>(); + + ( + sync.into_iter().flatten().collect(), + db.object().create_many(db_params), + ) + }) + .await?; + + trace!(%total_created_files, "Created new Objects"); + + if total_created_files > 0 { + trace!("Updating file paths with created objects"); + + sync.write_ops( + db, + file_path_update_args + .into_iter() + .unzip::<_, _, Vec<_>, Vec<_>>(), + ) + .await?; + + trace!("Updated file paths with created objects"); + } + + #[allow(clippy::cast_sign_loss)] // SAFETY: We're sure the value is positive + Ok(total_created_files as u64) } diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs index 6dcb1a6ba5cf..c0763cd02fd6 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs @@ -264,11 +264,13 @@ pub async fn generate_thumbnail( if let Ok(extension) = ImageExtension::from_str(extension) { if can_generate_thumbnail_for_image(extension) { + trace!("Generating image thumbnail for {}", path.display()); if let Err(e) = generate_image_thumbnail(&path, &output_path).await { return (start.elapsed(), Err(e)); } } } else if let Ok(extension) = DocumentExtension::from_str(extension) { + trace!("Generating document thumbnail for {}", path.display()); if can_generate_thumbnail_for_document(extension) { if let Err(e) = generate_image_thumbnail(&path, &output_path).await { return (start.elapsed(), Err(e)); @@ -282,6 +284,7 @@ pub async fn generate_thumbnail( use sd_file_ext::extensions::VideoExtension; if let Ok(extension) = VideoExtension::from_str(extension) { + trace!("Generating image thumbnail for {}", path.display()); if can_generate_thumbnail_for_video(extension) { if let Err(e) = generate_video_thumbnail(&path, &output_path).await { return (start.elapsed(), Err(e)); diff --git a/core/crates/indexer-rules/src/lib.rs b/core/crates/indexer-rules/src/lib.rs index 2b934104e585..4f4b6c9fb787 100644 --- a/core/crates/indexer-rules/src/lib.rs +++ b/core/crates/indexer-rules/src/lib.rs @@ -167,7 +167,7 @@ impl IndexerRuleCreateArgs { Ok(Some( db.indexer_rule() .create( - sd_utils::uuid_to_bytes(generate_pub_id()), + sd_utils::uuid_to_bytes(&generate_pub_id()), vec![ name::set(Some(self.name)), rules_per_kind::set(Some(rules_data)), diff --git a/core/crates/indexer-rules/src/seed.rs b/core/crates/indexer-rules/src/seed.rs index 74665f2fc56e..e0bed1bd9b0f 100644 --- a/core/crates/indexer-rules/src/seed.rs +++ b/core/crates/indexer-rules/src/seed.rs @@ -182,7 +182,7 @@ pub async fn new_or_existing_library(db: &PrismaClient) -> Result<(), SeederErro .into_iter() .enumerate() { - let pub_id = sd_utils::uuid_to_bytes(Uuid::from_u128(i as u128)); + let pub_id = sd_utils::uuid_to_bytes(&Uuid::from_u128(i as u128)); let rules = rmp_serde::to_vec_named(&rule.rules).map_err(IndexerRuleError::from)?; let data = vec![ diff --git a/core/crates/prisma-helpers/Cargo.toml b/core/crates/prisma-helpers/Cargo.toml index 6271e754b1f3..2d28f7765fd8 100644 --- a/core/crates/prisma-helpers/Cargo.toml +++ b/core/crates/prisma-helpers/Cargo.toml @@ -11,6 +11,8 @@ edition = { workspace = true } [dependencies] # Spacedrive Sub-crates sd-prisma = { path = "../../../crates/prisma" } +sd-utils = { path = "../../../crates/utils" } prisma-client-rust = { workspace = true } -serde = { workspace = true } +serde = { workspace = true, features = ["derive"] } +uuid = { workspace = true, features = ["v4", "serde"] } diff --git a/core/crates/prisma-helpers/src/lib.rs b/core/crates/prisma-helpers/src/lib.rs index c7c214417e01..ad8c4154914b 100644 --- a/core/crates/prisma-helpers/src/lib.rs +++ b/core/crates/prisma-helpers/src/lib.rs @@ -29,6 +29,12 @@ #![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)] use sd_prisma::prisma::{file_path, job, label, location, object}; +use sd_utils::{from_bytes_to_uuid, uuid_to_bytes}; + +use std::fmt; + +use serde::{Deserialize, Serialize}; +use uuid::Uuid; // File Path selectables! file_path::select!(file_path_pub_id { pub_id }); @@ -139,6 +145,10 @@ file_path::select!(file_path_to_full_path { path } }); +file_path::select!(file_path_to_create_object { + pub_id + date_created +}); // File Path includes! file_path::include!(file_path_with_object { object }); @@ -286,3 +296,173 @@ label::include!((take: i64) => label_with_objects { } } }); + +#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Clone)] +#[serde(transparent)] +pub struct CasId(String); + +impl From for file_path::cas_id::Type { + fn from(cas_id: CasId) -> Self { + Some(cas_id.0) + } +} + +impl From<&CasId> for file_path::cas_id::Type { + fn from(cas_id: &CasId) -> Self { + Some(cas_id.0.clone()) + } +} + +impl From<&str> for CasId { + fn from(cas_id: &str) -> Self { + Self(cas_id.to_string()) + } +} + +impl From for CasId { + fn from(cas_id: String) -> Self { + Self(cas_id) + } +} + +#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Clone)] +#[serde(transparent)] +#[repr(transparent)] +pub struct FilePathPubId(PubId); + +#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Clone)] +#[serde(transparent)] +#[repr(transparent)] +pub struct ObjectPubId(PubId); + +#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Clone)] +enum PubId { + Uuid(Uuid), + Vec(Vec), +} + +impl PubId { + fn new() -> Self { + Self::Uuid(Uuid::new_v4()) + } + + fn to_db(&self) -> Vec { + match self { + Self::Uuid(uuid) => uuid_to_bytes(uuid), + Self::Vec(bytes) => bytes.clone(), + } + } +} + +impl Default for PubId { + fn default() -> Self { + Self::new() + } +} + +impl From for PubId { + fn from(uuid: Uuid) -> Self { + Self::Uuid(uuid) + } +} + +impl From> for PubId { + fn from(bytes: Vec) -> Self { + Self::Vec(bytes) + } +} + +impl From<&[u8]> for PubId { + fn from(bytes: &[u8]) -> Self { + Self::Vec(bytes.to_vec()) + } +} + +impl From for Vec { + fn from(pub_id: PubId) -> Self { + match pub_id { + PubId::Uuid(uuid) => uuid_to_bytes(&uuid), + PubId::Vec(bytes) => bytes, + } + } +} + +impl From for Uuid { + fn from(pub_id: PubId) -> Self { + match pub_id { + PubId::Uuid(uuid) => uuid, + PubId::Vec(bytes) => from_bytes_to_uuid(&bytes), + } + } +} + +impl fmt::Display for PubId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Uuid(uuid) => write!(f, "{uuid}"), + Self::Vec(bytes) => write!(f, "{}", from_bytes_to_uuid(bytes)), + } + } +} + +macro_rules! delegate_pub_id { + ($($type_name:ty),+ $(,)?) => { + $( + impl From<::uuid::Uuid> for $type_name { + fn from(uuid: ::uuid::Uuid) -> Self { + Self(uuid.into()) + } + } + + impl From> for $type_name { + fn from(bytes: Vec) -> Self { + Self(bytes.into()) + } + } + + impl From<&[u8]> for $type_name { + fn from(bytes: &[u8]) -> Self { + Self(bytes.into()) + } + } + + impl From<$type_name> for Vec { + fn from(pub_id: $type_name) -> Self { + pub_id.0.into() + } + } + + impl From<$type_name> for ::uuid::Uuid { + fn from(pub_id: $type_name) -> Self { + pub_id.0.into() + } + } + + impl ::std::fmt::Display for $type_name { + fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { + write!(f, "{}", self.0) + } + } + + impl $type_name { + #[must_use] + pub fn new() -> Self { + Self(PubId::new()) + } + + #[must_use] + pub fn to_db(&self) -> Vec { + self.0.to_db() + } + } + + impl Default for $type_name { + fn default() -> Self { + Self::new() + } + } + )+ + }; +} + +delegate_pub_id!(FilePathPubId, ObjectPubId); diff --git a/core/crates/sync/src/manager.rs b/core/crates/sync/src/manager.rs index ff7c0217f969..843ca818c0e8 100644 --- a/core/crates/sync/src/manager.rs +++ b/core/crates/sync/src/manager.rs @@ -170,7 +170,7 @@ impl Manager { .crdt_operation() .find_many(vec![ crdt_operation::instance::is(vec![instance::pub_id::equals(uuid_to_bytes( - instance_uuid, + &instance_uuid, ))]), crdt_operation::timestamp::gt(timestamp.as_u64() as i64), ]) @@ -199,7 +199,7 @@ impl Manager { .map(|(instance_id, timestamp)| { prisma_client_rust::and![ $op::instance::is(vec![instance::pub_id::equals(uuid_to_bytes( - *instance_id + instance_id ))]), $op::timestamp::gt(timestamp.as_u64() as i64) ] @@ -211,7 +211,7 @@ impl Manager { .clocks .iter() .map(|(instance_id, _)| { - uuid_to_bytes(*instance_id) + uuid_to_bytes(instance_id) }) .collect() ) @@ -258,7 +258,7 @@ impl Manager { .map(|(instance_id, timestamp)| { prisma_client_rust::and![ $op::instance::is(vec![instance::pub_id::equals(uuid_to_bytes( - *instance_id + instance_id ))]), $op::timestamp::gt(timestamp.as_u64() as i64) ] @@ -270,7 +270,7 @@ impl Manager { .clocks .iter() .map(|(instance_id, _)| { - uuid_to_bytes(*instance_id) + uuid_to_bytes(instance_id) }) .collect() ) diff --git a/crates/utils/src/lib.rs b/crates/utils/src/lib.rs index 1a910d15da18..07a0bd20a5d7 100644 --- a/crates/utils/src/lib.rs +++ b/crates/utils/src/lib.rs @@ -68,7 +68,7 @@ pub const fn i64_to_frontend(num: i64) -> (i32, u32) { #[inline] #[must_use] -pub fn uuid_to_bytes(uuid: Uuid) -> Vec { +pub fn uuid_to_bytes(uuid: &Uuid) -> Vec { uuid.as_bytes().to_vec() } From 4db53f7e31c9d99b8b67219e15506a7775f310b2 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Sat, 25 May 2024 01:37:46 -0300 Subject: [PATCH 20/33] Fixed file identifier job - still need to work on its progress report frontend --- .../heavy-lifting/src/file_identifier/job.rs | 383 +++++++++------ .../heavy-lifting/src/file_identifier/mod.rs | 101 +++- .../src/file_identifier/shallow.rs | 79 +-- ...extract_file_metadata.rs => identifier.rs} | 93 +++- .../src/file_identifier/tasks/mod.rs | 137 +++--- .../file_identifier/tasks/object_processor.rs | 460 ++++++------------ core/crates/heavy-lifting/src/indexer/mod.rs | 44 +- .../heavy-lifting/src/indexer/tasks/saver.rs | 119 +++-- .../src/indexer/tasks/updater.rs | 119 ++--- .../src/indexer/tasks/walker/entry.rs | 14 +- .../src/indexer/tasks/walker/mod.rs | 126 ++--- .../heavy-lifting/src/job_system/runner.rs | 15 +- .../heavy-lifting/src/job_system/store.rs | 2 +- .../helpers/exif_media_data.rs | 2 +- .../tasks/media_data_extractor.rs | 4 +- core/crates/indexer-rules/src/lib.rs | 2 +- core/crates/prisma-helpers/src/lib.rs | 40 +- core/crates/sync/tests/lib.rs | 4 +- core/crates/sync/tests/mock_instance.rs | 4 +- core/src/api/cloud.rs | 4 +- core/src/api/tags.rs | 2 +- core/src/cloud/sync/receive.rs | 12 +- core/src/library/config.rs | 2 +- core/src/library/manager/mod.rs | 4 +- core/src/location/manager/watcher/utils.rs | 4 +- core/src/location/mod.rs | 4 +- 26 files changed, 967 insertions(+), 813 deletions(-) rename core/crates/heavy-lifting/src/file_identifier/tasks/{extract_file_metadata.rs => identifier.rs} (84%) diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index 5abec957491a..0f195f3b0a0e 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -12,7 +12,7 @@ use crate::{ }; use sd_core_file_path_helper::IsolatedFilePathData; -use sd_core_prisma_helpers::file_path_for_file_identifier; +use sd_core_prisma_helpers::{file_path_for_file_identifier, CasId}; use sd_prisma::prisma::{file_path, location, SortOrder}; use sd_task_system::{ @@ -23,6 +23,7 @@ use sd_utils::{db::maybe_missing, u64_to_frontend}; use std::{ collections::{HashMap, HashSet}, + fmt, hash::{Hash, Hasher}, mem, path::PathBuf, @@ -35,28 +36,61 @@ use futures_concurrency::future::TryJoin; use serde::{Deserialize, Serialize}; use serde_json::json; use tokio::time::Instant; -use tracing::{debug, error, trace, warn}; +use tracing::{debug, error, instrument, trace, warn}; use super::{ - orphan_path_filters_deep, orphan_path_filters_shallow, - tasks::{ - extract_file_metadata, object_processor, ExtractFileMetadataTask, ObjectProcessorTask, - }, + accumulate_file_paths_by_cas_id, dispatch_object_processor_tasks, orphan_path_filters_deep, + orphan_path_filters_shallow, + tasks::{self, identifier, object_processor, FilePathToCreateOrLinkObject}, CHUNK_SIZE, }; +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +enum Phase { + IdentifyingFiles, + ProcessingObjects, +} + +impl Default for Phase { + fn default() -> Self { + Self::IdentifyingFiles + } +} + +impl fmt::Display for Phase { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::IdentifyingFiles => write!(f, "identifying_files"), + Self::ProcessingObjects => write!(f, "processing_objects"), + } + } +} + +impl From for String { + fn from(phase: Phase) -> Self { + phase.to_string() + } +} + #[derive(Debug)] pub struct FileIdentifier { + // Received arguments location: Arc, location_path: Arc, sub_path: Option, - metadata: Metadata, + // Inner state + file_paths_accumulator: HashMap>, + file_paths_ids_with_priority: HashSet, - priority_tasks_ids: HashSet, + // Job control + phase: Phase, + // Run data + metadata: Metadata, errors: Vec, + // On shutdown data pending_tasks_on_resume: Vec>, tasks_for_shutdown: Vec>>, } @@ -86,16 +120,14 @@ impl Job for FileIdentifier { .into_iter() .map(|(task_kind, task_bytes)| async move { match task_kind { - TaskKind::ExtractFileMetadata => { - >::deserialize( - &task_bytes, - (Arc::clone(ctx.db()), Arc::clone(ctx.sync())), - ) - .await - .map(IntoTask::into_task) - } + TaskKind::Identifier => tasks::Identifier::deserialize( + &task_bytes, + (Arc::clone(ctx.db()), Arc::clone(ctx.sync())), + ) + .await + .map(IntoTask::into_task), - TaskKind::ObjectProcessor => ObjectProcessorTask::deserialize( + TaskKind::ObjectProcessor => tasks::ObjectProcessor::deserialize( &task_bytes, (Arc::clone(ctx.db()), Arc::clone(ctx.sync())), ) @@ -126,12 +158,10 @@ impl Job for FileIdentifier { while let Some(task) = pending_running_tasks.next().await { match task { Ok(TaskStatus::Done((task_id, TaskOutput::Out(out)))) => { - if let Some(new_object_processor_task) = self - .process_task_output(task_id, out, &ctx, &dispatcher) - .await - { - pending_running_tasks.push(new_object_processor_task); - }; + pending_running_tasks.extend( + self.process_task_output(task_id, out, &ctx, &dispatcher) + .await, + ); } Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => { @@ -208,8 +238,10 @@ impl FileIdentifier { .map(Arc::new)?, location: Arc::new(location), sub_path, + file_paths_accumulator: HashMap::new(), + file_paths_ids_with_priority: HashSet::new(), + phase: Phase::default(), metadata: Metadata::default(), - priority_tasks_ids: HashSet::new(), errors: Vec::new(), pending_tasks_on_resume: Vec::new(), tasks_for_shutdown: Vec::new(), @@ -247,17 +279,16 @@ impl FileIdentifier { // First we dispatch some shallow priority tasks to quickly identify orphans in the location // root directory or in the desired sub-path - let file_paths_already_identifying = self - .dispatch_priority_identifier_tasks( - &mut last_orphan_file_path_id, - maybe_sub_iso_file_path - .as_ref() - .unwrap_or(&location_root_iso_file_path), - ctx, - dispatcher, - pending_running_tasks, - ) - .await?; + self.dispatch_priority_identifier_tasks( + &mut last_orphan_file_path_id, + maybe_sub_iso_file_path + .as_ref() + .unwrap_or(&location_root_iso_file_path), + ctx, + dispatcher, + pending_running_tasks, + ) + .await?; self.dispatch_deep_identifier_tasks( &mut last_orphan_file_path_id, @@ -265,15 +296,11 @@ impl FileIdentifier { ctx, dispatcher, pending_running_tasks, - &file_paths_already_identifying, ) .await?; - // Multiplying by 2 as each batch will have 2 tasks - self.metadata.total_tasks *= 2; - ctx.progress(vec![ - ProgressUpdate::TaskCount(self.metadata.total_tasks), + ProgressUpdate::TaskCount(self.metadata.total_identifier_tasks), ProgressUpdate::Message(format!( "{} files to be identified", self.metadata.total_found_orphans @@ -284,7 +311,11 @@ impl FileIdentifier { self.metadata.seeking_orphans_time = start.elapsed(); } else { ctx.progress(vec![ - ProgressUpdate::TaskCount(self.metadata.total_tasks), + ProgressUpdate::TaskCount(if matches!(self.phase, Phase::IdentifyingFiles) { + self.metadata.total_identifier_tasks + } else { + self.metadata.total_object_processor_tasks + }), ProgressUpdate::Message(format!( "{} files to be identified", self.metadata.total_found_orphans @@ -308,13 +339,13 @@ impl FileIdentifier { any_task_output: Box, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, - ) -> Option> { - if any_task_output.is::() { + ) -> Vec> { + if any_task_output.is::() { return self - .process_extract_file_metadata_output( + .process_identifier_output( task_id, *any_task_output - .downcast::() + .downcast::() .expect("just checked"), ctx, dispatcher, @@ -333,77 +364,104 @@ impl FileIdentifier { unreachable!("Unexpected task output type: "); } - None + vec![] } - async fn process_extract_file_metadata_output( + #[instrument( + skip_all, + fields( + %task_id, + ?extract_metadata_time, + ?save_db_time, + created_objects_count, + total_identified_files, + errors_count = errors.len() + ) + )] + async fn process_identifier_output( &mut self, task_id: TaskId, - extract_file_metadata::Output { - identified_files, + identifier::Output { + file_path_ids_with_new_object, + file_paths_by_cas_id, extract_metadata_time, + save_db_time, + created_objects_count, + total_identified_files, errors, - }: extract_file_metadata::Output, + }: identifier::Output, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, - ) -> Option> { + ) -> Vec> { self.metadata.extract_metadata_time += extract_metadata_time; + self.metadata.mean_save_db_time_on_identifier_tasks += save_db_time; + self.metadata.created_objects_count += created_objects_count; + + let file_paths_with_new_object_to_report = file_path_ids_with_new_object + .into_iter() + .filter_map(|id| self.file_paths_ids_with_priority.take(&id)) + .collect::>(); + + if !file_paths_with_new_object_to_report.is_empty() { + ctx.report_update(UpdateEvent::NewIdentifiedObjects { + file_path_ids: file_paths_with_new_object_to_report, + }); + } if !errors.is_empty() { - error!("Non critical errors while extracting metadata: {errors:#?}"); + error!(?errors, "Non critical errors while extracting metadata"); self.errors.extend(errors); } - let maybe_task = if identified_files.is_empty() { - self.metadata.completed_tasks += 2; // Adding 2 as we will not have an ObjectProcessorTask + accumulate_file_paths_by_cas_id(file_paths_by_cas_id, &mut self.file_paths_accumulator); + + self.metadata.completed_identifier_tasks += 1; + + ctx.progress(vec![ + ProgressUpdate::CompletedTaskCount(self.metadata.completed_identifier_tasks), + ProgressUpdate::Message(format!( + "Identified {total_identified_files} of {} files", + self.metadata.total_found_orphans + )), + ]) + .await; + + debug!( + "Processed ({}/{}) identifier tasks, took: {extract_metadata_time:?}", + self.metadata.completed_identifier_tasks, self.metadata.total_identifier_tasks, + ); - ctx.progress(vec![ProgressUpdate::CompletedTaskCount( - self.metadata.completed_tasks, - )]) + // If we completed all identifier tasks, then we dispatch the object processor tasks + if self.metadata.completed_identifier_tasks == self.metadata.total_identifier_tasks { + let tasks = dispatch_object_processor_tasks( + self.file_paths_accumulator.drain(), + ctx, + dispatcher, + false, + ) .await; - None - } else { - self.metadata.completed_tasks += 1; + self.metadata.total_object_processor_tasks = tasks.len() as u64; ctx.progress(vec![ - ProgressUpdate::CompletedTaskCount(self.metadata.completed_tasks), - ProgressUpdate::Message(format!("Identified {} files", identified_files.len())), + ProgressUpdate::TaskCount(self.metadata.total_object_processor_tasks), + ProgressUpdate::CompletedTaskCount(0), + ProgressUpdate::phase(self.phase), ]) .await; - let with_priority = self.priority_tasks_ids.remove(&task_id); - - let task = dispatcher - .dispatch(ObjectProcessorTask::new( - identified_files, - Arc::clone(ctx.db()), - Arc::clone(ctx.sync()), - with_priority, - )) - .await; - - if with_priority { - self.priority_tasks_ids.insert(task.task_id()); - } - - Some(task) - }; - - debug!( - "Processed {}/{} file identifier tasks, took: {extract_metadata_time:?}", - self.metadata.completed_tasks, self.metadata.total_tasks, - ); - - maybe_task + tasks + } else { + vec![] + } } + #[instrument(skip(self, file_path_ids_with_new_object, ctx))] async fn process_object_processor_output( &mut self, task_id: TaskId, object_processor::Output { file_path_ids_with_new_object, - assign_cas_ids_time, fetch_existing_objects_time, assign_to_existing_object_time, create_object_time, @@ -412,17 +470,16 @@ impl FileIdentifier { }: object_processor::Output, ctx: &impl JobContext, ) { - self.metadata.assign_cas_ids_time += assign_cas_ids_time; self.metadata.fetch_existing_objects_time += fetch_existing_objects_time; self.metadata.assign_to_existing_object_time += assign_to_existing_object_time; self.metadata.create_object_time += create_object_time; self.metadata.created_objects_count += created_objects_count; self.metadata.linked_objects_count += linked_objects_count; - self.metadata.completed_tasks += 1; + self.metadata.completed_object_processor_tasks += 1; ctx.progress(vec![ - ProgressUpdate::CompletedTaskCount(self.metadata.completed_tasks), + ProgressUpdate::CompletedTaskCount(self.metadata.completed_object_processor_tasks), ProgressUpdate::Message(format!( "Processed {} of {} objects", self.metadata.created_objects_count + self.metadata.linked_objects_count, @@ -431,20 +488,22 @@ impl FileIdentifier { ]) .await; - if self.priority_tasks_ids.remove(&task_id) { + let file_paths_with_new_object_to_report = file_path_ids_with_new_object + .into_iter() + .filter_map(|id| self.file_paths_ids_with_priority.take(&id)) + .collect::>(); + + if !file_paths_with_new_object_to_report.is_empty() { ctx.report_update(UpdateEvent::NewIdentifiedObjects { - file_path_ids: file_path_ids_with_new_object, + file_path_ids: file_paths_with_new_object_to_report, }); } debug!( - "Processed {}/{} file identifier tasks, took: {:?}", - self.metadata.completed_tasks, - self.metadata.total_tasks, - assign_cas_ids_time - + fetch_existing_objects_time - + assign_to_existing_object_time - + create_object_time, + "Processed ({}/{}) object processor tasks, took: {:?}", + self.metadata.completed_object_processor_tasks, + self.metadata.total_object_processor_tasks, + fetch_existing_objects_time + assign_to_existing_object_time + create_object_time, ); } @@ -455,11 +514,9 @@ impl FileIdentifier { ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, pending_running_tasks: &FuturesUnordered>, - ) -> Result, file_identifier::Error> { + ) -> Result<(), file_identifier::Error> { let db = ctx.db(); - let mut file_paths_already_identifying = HashSet::new(); - loop { #[allow(clippy::cast_possible_wrap)] // SAFETY: we know that CHUNK_SIZE is a valid i64 @@ -482,29 +539,33 @@ impl FileIdentifier { break; } - file_paths_already_identifying.extend(orphan_paths.iter().map(|path| path.id)); + self.file_paths_ids_with_priority.extend( + orphan_paths + .iter() + .map(|file_path_for_file_identifier::Data { id, .. }| *id), + ); self.metadata.total_found_orphans += orphan_paths.len() as u64; *last_orphan_file_path_id = Some(orphan_paths.last().expect("orphan_paths is not empty").id); - self.metadata.total_tasks += 1; - - let priority_task = dispatcher - .dispatch(ExtractFileMetadataTask::new( - Arc::clone(&self.location), - Arc::clone(&self.location_path), - orphan_paths, - true, - )) - .await; - - self.priority_tasks_ids.insert(priority_task.task_id()); + self.metadata.total_identifier_tasks += 1; - pending_running_tasks.push(priority_task); + pending_running_tasks.push( + dispatcher + .dispatch(tasks::Identifier::new( + Arc::clone(&self.location), + Arc::clone(&self.location_path), + orphan_paths, + true, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + )) + .await, + ); } - Ok(file_paths_already_identifying) + Ok(()) } async fn dispatch_deep_identifier_tasks( @@ -514,7 +575,6 @@ impl FileIdentifier { ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, pending_running_tasks: &FuturesUnordered>, - file_paths_already_identifying: &HashSet, ) -> Result<(), file_identifier::Error> { let db = ctx.db(); @@ -543,7 +603,9 @@ impl FileIdentifier { *last_orphan_file_path_id = Some(orphan_paths.last().expect("orphan_paths is not empty").id); - orphan_paths.retain(|path| !file_paths_already_identifying.contains(&path.id)); + orphan_paths.retain(|file_path_for_file_identifier::Data { id, .. }| { + !self.file_paths_ids_with_priority.contains(id) + }); // If we don't have any new orphan paths after filtering out, we can skip this iteration if orphan_paths.is_empty() { @@ -552,15 +614,17 @@ impl FileIdentifier { self.metadata.total_found_orphans += orphan_paths.len() as u64; - self.metadata.total_tasks += 1; + self.metadata.total_identifier_tasks += 1; pending_running_tasks.push( dispatcher - .dispatch(ExtractFileMetadataTask::new( + .dispatch(tasks::Identifier::new( Arc::clone(&self.location), Arc::clone(&self.location_path), orphan_paths, false, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), )) .await, ); @@ -572,7 +636,7 @@ impl FileIdentifier { #[derive(Debug, Clone, Copy, Serialize, Deserialize)] enum TaskKind { - ExtractFileMetadata, + Identifier, ObjectProcessor, } @@ -582,9 +646,11 @@ struct SaveState { location_path: Arc, sub_path: Option, - metadata: Metadata, + file_paths_accumulator: HashMap>, + file_paths_ids_with_priority: HashSet, - priority_tasks_ids: HashSet, + phase: Phase, + metadata: Metadata, errors: Vec, @@ -594,7 +660,7 @@ struct SaveState { #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct Metadata { extract_metadata_time: Duration, - assign_cas_ids_time: Duration, + mean_save_db_time_on_identifier_tasks: Duration, fetch_existing_objects_time: Duration, assign_to_existing_object_time: Duration, create_object_time: Duration, @@ -602,15 +668,17 @@ pub struct Metadata { total_found_orphans: u64, created_objects_count: u64, linked_objects_count: u64, - completed_tasks: u64, - total_tasks: u64, + total_identifier_tasks: u64, + completed_identifier_tasks: u64, + total_object_processor_tasks: u64, + completed_object_processor_tasks: u64, } impl From for Vec { fn from( Metadata { extract_metadata_time, - assign_cas_ids_time, + mean_save_db_time_on_identifier_tasks, fetch_existing_objects_time, assign_to_existing_object_time, create_object_time, @@ -618,8 +686,10 @@ impl From for Vec { total_found_orphans, created_objects_count, linked_objects_count, - completed_tasks, - total_tasks, + total_identifier_tasks, + completed_identifier_tasks, + total_object_processor_tasks, + completed_object_processor_tasks, }: Metadata, ) -> Self { vec![ @@ -630,7 +700,10 @@ impl From for Vec { }, ReportOutputMetadata::Metrics(HashMap::from([ ("extract_metadata_time".into(), json!(extract_metadata_time)), - ("assign_cas_ids_time".into(), json!(assign_cas_ids_time)), + ( + "mean_save_db_time_on_identifier_tasks".into(), + json!(mean_save_db_time_on_identifier_tasks), + ), ( "fetch_existing_objects_time".into(), json!(fetch_existing_objects_time), @@ -644,8 +717,22 @@ impl From for Vec { ("total_found_orphans".into(), json!(total_found_orphans)), ("created_objects_count".into(), json!(created_objects_count)), ("linked_objects_count".into(), json!(linked_objects_count)), - ("completed_tasks".into(), json!(completed_tasks)), - ("total_tasks".into(), json!(total_tasks)), + ( + "total_identifier_tasks".into(), + json!(total_identifier_tasks), + ), + ( + "completed_identifier_tasks".into(), + json!(completed_identifier_tasks), + ), + ( + "total_object_processor_tasks".into(), + json!(total_object_processor_tasks), + ), + ( + "completed_object_processor_tasks".into(), + json!(completed_object_processor_tasks), + ), ])), ] } @@ -657,8 +744,10 @@ impl SerializableJob for FileIdentifier { location, location_path, sub_path, + file_paths_accumulator, + file_paths_ids_with_priority, + phase, metadata, - priority_tasks_ids, errors, tasks_for_shutdown, .. @@ -668,22 +757,23 @@ impl SerializableJob for FileIdentifier { location, location_path, sub_path, + file_paths_accumulator, + file_paths_ids_with_priority, + phase, metadata, - priority_tasks_ids, + errors, tasks_for_shutdown_bytes: Some(SerializedTasks(rmp_serde::to_vec_named( &tasks_for_shutdown .into_iter() .map(|task| async move { - if task.is::() { + if task.is::() { SerializableTask::serialize( - *task - .downcast::() - .expect("just checked"), + *task.downcast::().expect("just checked"), ) .await - .map(|bytes| (TaskKind::ExtractFileMetadata, bytes)) - } else if task.is::() { - task.downcast::() + .map(|bytes| (TaskKind::Identifier, bytes)) + } else if task.is::() { + task.downcast::() .expect("just checked") .serialize() .await @@ -696,7 +786,6 @@ impl SerializableJob for FileIdentifier { .try_join() .await?, )?)), - errors, }) .map(Some) } @@ -709,8 +798,10 @@ impl SerializableJob for FileIdentifier { location, location_path, sub_path, + file_paths_accumulator, + file_paths_ids_with_priority, + phase, metadata, - priority_tasks_ids, errors, tasks_for_shutdown_bytes, } = rmp_serde::from_slice::(serialized_job)?; @@ -720,8 +811,10 @@ impl SerializableJob for FileIdentifier { location, location_path, sub_path, + file_paths_accumulator, + file_paths_ids_with_priority, + phase, metadata, - priority_tasks_ids, errors, pending_tasks_on_resume: Vec::new(), tasks_for_shutdown: Vec::new(), diff --git a/core/crates/heavy-lifting/src/file_identifier/mod.rs b/core/crates/heavy-lifting/src/file_identifier/mod.rs index 15bccde154a3..b51c413555ab 100644 --- a/core/crates/heavy-lifting/src/file_identifier/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/mod.rs @@ -1,12 +1,20 @@ -use crate::utils::sub_path; +use crate::{utils::sub_path, OuterContext}; use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData}; +use sd_core_prisma_helpers::CasId; use sd_file_ext::{extensions::Extension, kind::ObjectKind}; use sd_prisma::prisma::{file_path, location}; +use sd_task_system::{TaskDispatcher, TaskHandle}; use sd_utils::{db::MissingFieldError, error::FileIOError}; -use std::{fs::Metadata, path::Path}; +use std::{ + collections::{hash_map::Entry, HashMap}, + fs::Metadata, + mem, + path::Path, + sync::Arc, +}; use prisma_client_rust::{or, QueryError}; use rspc::ErrorCode; @@ -25,6 +33,8 @@ use cas_id::generate_cas_id; pub use job::FileIdentifier; pub use shallow::shallow; +use tasks::FilePathToCreateOrLinkObject; + // we break these tasks into chunks of 100 to improve performance const CHUNK_SIZE: usize = 100; @@ -173,3 +183,90 @@ fn orphan_path_filters_deep( ], ) } + +async fn dispatch_object_processor_tasks( + file_paths_by_cas_id: Iter, + ctx: &impl OuterContext, + dispatcher: &impl TaskDispatcher, + with_priority: bool, +) -> Vec> +where + Iter: IntoIterator)> + Send, + Iter::IntoIter: Send, +{ + let mut current_batch = HashMap::<_, Vec<_>>::new(); + let mut tasks = vec![]; + + let mut current_batch_size = 0; + + for (cas_id, objects_to_create_or_link) in file_paths_by_cas_id { + if objects_to_create_or_link.len() >= CHUNK_SIZE { + tasks.push( + dispatcher + .dispatch(tasks::ObjectProcessor::new( + HashMap::from([(cas_id, objects_to_create_or_link)]), + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + with_priority, + )) + .await, + ); + } else { + current_batch_size += objects_to_create_or_link.len(); + match current_batch.entry(cas_id) { + Entry::Occupied(entry) => { + entry.into_mut().extend(objects_to_create_or_link); + } + Entry::Vacant(entry) => { + entry.insert(objects_to_create_or_link); + } + } + + if current_batch_size >= CHUNK_SIZE { + tasks.push( + dispatcher + .dispatch(tasks::ObjectProcessor::new( + mem::take(&mut current_batch), + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + with_priority, + )) + .await, + ); + + current_batch_size = 0; + } + } + } + + if !current_batch.is_empty() { + tasks.push( + dispatcher + .dispatch(tasks::ObjectProcessor::new( + current_batch, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + with_priority, + )) + .await, + ); + } + + tasks +} + +fn accumulate_file_paths_by_cas_id( + input: HashMap>, + accumulator: &mut HashMap>, +) { + for (cas_id, file_paths) in input { + match accumulator.entry(cas_id) { + Entry::<_, Vec<_>>::Occupied(entry) => { + entry.into_mut().extend(file_paths); + } + Entry::Vacant(entry) => { + entry.insert(file_paths); + } + } + } +} diff --git a/core/crates/heavy-lifting/src/file_identifier/shallow.rs b/core/crates/heavy-lifting/src/file_identifier/shallow.rs index 2faedb60077f..9543b5f0a17b 100644 --- a/core/crates/heavy-lifting/src/file_identifier/shallow.rs +++ b/core/crates/heavy-lifting/src/file_identifier/shallow.rs @@ -8,24 +8,22 @@ use sd_core_prisma_helpers::file_path_for_file_identifier; use sd_prisma::prisma::{file_path, location, SortOrder}; use sd_task_system::{ - BaseTaskDispatcher, CancelTaskOnDrop, TaskDispatcher, TaskOutput, TaskStatus, + BaseTaskDispatcher, CancelTaskOnDrop, TaskDispatcher, TaskHandle, TaskOutput, TaskStatus, }; use sd_utils::db::maybe_missing; use std::{ + collections::HashMap, path::{Path, PathBuf}, sync::Arc, }; -use futures_concurrency::future::FutureGroup; -use lending_stream::{LendingStream, StreamExt}; +use futures::{stream::FuturesUnordered, StreamExt}; use tracing::{debug, warn}; use super::{ - orphan_path_filters_shallow, - tasks::{ - extract_file_metadata, object_processor, ExtractFileMetadataTask, ObjectProcessorTask, - }, + accumulate_file_paths_by_cas_id, dispatch_object_processor_tasks, orphan_path_filters_shallow, + tasks::{self, identifier, object_processor}, CHUNK_SIZE, }; @@ -60,7 +58,7 @@ pub async fn shallow( let mut orphans_count = 0; let mut last_orphan_file_path_id = None; - let mut pending_running_tasks = FutureGroup::new(); + let mut identifier_tasks = vec![]; loop { #[allow(clippy::cast_possible_wrap)] @@ -87,16 +85,18 @@ pub async fn shallow( orphans_count += orphan_paths.len() as u64; last_orphan_file_path_id = Some(last_orphan.id); - pending_running_tasks.insert(CancelTaskOnDrop::new( + identifier_tasks.push( dispatcher - .dispatch(ExtractFileMetadataTask::new( + .dispatch(tasks::Identifier::new( Arc::clone(&location), Arc::clone(&location_path), orphan_paths, true, + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), )) .await, - )); + ); } if orphans_count == 0 { @@ -108,49 +108,66 @@ pub async fn shallow( return Ok(vec![]); } - let errors = process_tasks(pending_running_tasks, dispatcher, ctx).await?; + let errors = process_tasks(identifier_tasks, dispatcher, ctx).await?; Ok(errors) } async fn process_tasks( - pending_running_tasks: FutureGroup>, + identifier_tasks: Vec>, dispatcher: &BaseTaskDispatcher, ctx: &impl OuterContext, ) -> Result, Error> { - let mut pending_running_tasks = pending_running_tasks.lend_mut(); + let total_identifier_tasks = identifier_tasks.len(); - let db = ctx.db(); - let sync = ctx.sync(); + let mut pending_running_tasks = identifier_tasks + .into_iter() + .map(CancelTaskOnDrop::new) + .collect::>(); let mut errors = vec![]; + let mut completed_identifier_tasks = 0; + let mut file_paths_accumulator = HashMap::new(); - while let Some((pending_running_tasks, task_result)) = pending_running_tasks.next().await { + while let Some(task_result) = pending_running_tasks.next().await { match task_result { Ok(TaskStatus::Done((_, TaskOutput::Out(any_task_output)))) => { // We only care about ExtractFileMetadataTaskOutput because we need to dispatch further tasks // and the ObjectProcessorTask only gives back some metrics not much important for // shallow file identifier - if any_task_output.is::() { - let extract_file_metadata::Output { - identified_files, + if any_task_output.is::() { + let identifier::Output { + file_path_ids_with_new_object, + file_paths_by_cas_id, errors: more_errors, .. } = *any_task_output.downcast().expect("just checked"); + completed_identifier_tasks += 1; + + ctx.report_update(crate::UpdateEvent::NewIdentifiedObjects { + file_path_ids: file_path_ids_with_new_object, + }); + + accumulate_file_paths_by_cas_id( + file_paths_by_cas_id, + &mut file_paths_accumulator, + ); + errors.extend(more_errors); - if !identified_files.is_empty() { - pending_running_tasks.insert(CancelTaskOnDrop::new( - dispatcher - .dispatch(ObjectProcessorTask::new( - identified_files, - Arc::clone(db), - Arc::clone(sync), - true, - )) - .await, - )); + if total_identifier_tasks == completed_identifier_tasks { + pending_running_tasks.extend( + dispatch_object_processor_tasks( + file_paths_accumulator.drain(), + ctx, + dispatcher, + true, + ) + .await + .into_iter() + .map(CancelTaskOnDrop::new), + ); } } else { let object_processor::Output { diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs similarity index 84% rename from core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs rename to core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs index d6560ed0fad8..2154efda64e2 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/extract_file_metadata.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs @@ -4,9 +4,7 @@ use crate::{ }; use sd_core_file_path_helper::IsolatedFilePathData; -use sd_core_prisma_helpers::{ - file_path_for_file_identifier, file_path_to_create_object, CasId, FilePathPubId, -}; +use sd_core_prisma_helpers::{file_path_for_file_identifier, CasId, FilePathPubId}; use sd_core_sync::Manager as SyncManager; use sd_file_ext::kind::ObjectKind; @@ -31,35 +29,65 @@ use serde::{Deserialize, Serialize}; use tokio::time::Instant; use tracing::{error, instrument, trace, Level}; -use super::{create_objects, IdentifiedFile, ObjectToCreateOrLink}; +use super::{create_objects_and_update_file_paths, FilePathToCreateOrLinkObject}; + +#[derive(Debug, Serialize, Deserialize)] +struct IdentifiedFile { + file_path: file_path_for_file_identifier::Data, + cas_id: CasId, + kind: ObjectKind, +} + +impl IdentifiedFile { + pub fn new( + file_path: file_path_for_file_identifier::Data, + cas_id: impl Into, + kind: ObjectKind, + ) -> Self { + Self { + file_path, + cas_id: cas_id.into(), + kind, + } + } +} #[derive(Debug)] -pub struct ExtractFileMetadataTask { +pub struct Identifier { + // Task control id: TaskId, + with_priority: bool, + + // Received args location: Arc, location_path: Arc, file_paths_by_id: HashMap, + + // Inner state identified_files: HashMap, - file_paths_without_cas_id: Vec<(file_path_to_create_object::Data, ObjectKind)>, - with_priority: bool, + file_paths_without_cas_id: Vec, + // Out output: Output, + // Dependencies db: Arc, sync: Arc, } #[derive(Debug, Default, Serialize, Deserialize)] pub struct Output { - pub file_path_pub_ids_and_kinds_by_cas_id: HashMap>, + pub file_path_ids_with_new_object: Vec, + pub file_paths_by_cas_id: HashMap>, pub extract_metadata_time: Duration, pub save_db_time: Duration, pub created_objects_count: u64, + pub total_identified_files: u64, pub errors: Vec, } #[async_trait::async_trait] -impl Task for ExtractFileMetadataTask { +impl Task for Identifier { fn id(&self) -> TaskId { self.id } @@ -155,17 +183,17 @@ impl Task for ExtractFileMetadataTask { cas_id: None, kind, .. }) => { let file_path_for_file_identifier::Data { + id, pub_id, date_created, .. } = file_path; - file_paths_without_cas_id.push(( - file_path_to_create_object::Data { - pub_id, - date_created, - }, + file_paths_without_cas_id.push(FilePathToCreateOrLinkObject { + id, + file_path_pub_id: pub_id.into(), kind, - )); + created_at: date_created, + }); } Err(e) => { handle_non_critical_errors( @@ -197,22 +225,31 @@ impl Task for ExtractFileMetadataTask { output.extract_metadata_time = start_time.elapsed(); + output.total_identified_files = + identified_files.len() as u64 + file_paths_without_cas_id.len() as u64; + trace!( identified_files_count = identified_files.len(), "All files have been processed, saving cas_ids to db..." ); let start_time = Instant::now(); // Assign cas_id to each file path - let (_, created) = ( + let ((), file_path_ids_with_new_object) = ( assign_cas_id_to_file_paths(identified_files, &self.db, &self.sync), - create_objects(file_paths_without_cas_id.iter(), &self.db, &self.sync), + create_objects_and_update_file_paths( + file_paths_without_cas_id.drain(..), + &self.db, + &self.sync, + ), ) .try_join() .await?; output.save_db_time = start_time.elapsed(); - output.created_objects_count = created; - output.file_path_pub_ids_and_kinds_by_cas_id = identified_files.drain().fold( + output.created_objects_count = file_path_ids_with_new_object.len() as u64; + output.file_path_ids_with_new_object = file_path_ids_with_new_object; + + output.file_paths_by_cas_id = identified_files.drain().fold( HashMap::new(), |mut map, ( @@ -220,15 +257,19 @@ impl Task for ExtractFileMetadataTask { IdentifiedFile { cas_id, kind, - file_path, + file_path: + file_path_for_file_identifier::Data { + id, date_created, .. + }, }, )| { map.entry(cas_id) .or_insert_with(|| Vec::with_capacity(1)) - .push(ObjectToCreateOrLink { + .push(FilePathToCreateOrLinkObject { + id, file_path_pub_id, kind, - created_at: file_path.date_created, + created_at: date_created, }); map @@ -242,7 +283,7 @@ impl Task for ExtractFileMetadataTask { } } -impl ExtractFileMetadataTask { +impl Identifier { #[must_use] pub fn new( location: Arc, @@ -384,12 +425,12 @@ struct SaveState { location_path: Arc, file_paths_by_id: HashMap, identified_files: HashMap, - file_paths_without_cas_id: Vec<(file_path_to_create_object::Data, ObjectKind)>, + file_paths_without_cas_id: Vec, output: Output, with_priority: bool, } -impl SerializableTask for ExtractFileMetadataTask { +impl SerializableTask for Identifier { type SerializeError = rmp_serde::encode::Error; type DeserializeError = rmp_serde::decode::Error; @@ -436,13 +477,13 @@ impl SerializableTask for ExtractFileMetadataTask { with_priority, }| Self { id, + with_priority, location, location_path, file_paths_by_id, identified_files, file_paths_without_cas_id, output, - with_priority, db, sync, }, diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs index c1f6db1e668f..8f3f65216f01 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs @@ -1,9 +1,6 @@ use crate::file_identifier; -use chrono::{DateTime, FixedOffset}; -use sd_core_prisma_helpers::{ - file_path_for_file_identifier, file_path_to_create_object, CasId, FilePathPubId, ObjectPubId, -}; +use sd_core_prisma_helpers::{file_path_id, FilePathPubId, ObjectPubId}; use sd_core_sync::Manager as SyncManager; use sd_file_ext::kind::ObjectKind; @@ -11,72 +8,84 @@ use sd_prisma::{ prisma::{file_path, object, PrismaClient}, prisma_sync, }; -use sd_sync::OperationFactory; +use sd_sync::{CRDTOperation, OperationFactory}; use sd_utils::msgpack; +use chrono::{DateTime, FixedOffset}; +use prisma_client_rust::Select; use serde::{Deserialize, Serialize}; use tracing::{instrument, trace, Level}; -pub mod extract_file_metadata; +pub mod identifier; pub mod object_processor; -pub use extract_file_metadata::ExtractFileMetadataTask; -pub use object_processor::ObjectProcessorTask; +pub use identifier::Identifier; +pub use object_processor::ObjectProcessor; #[derive(Debug, Serialize, Deserialize)] -pub(super) struct IdentifiedFile { - file_path: file_path_for_file_identifier::Data, - cas_id: CasId, +pub(super) struct FilePathToCreateOrLinkObject { + id: file_path::id::Type, + file_path_pub_id: FilePathPubId, kind: ObjectKind, + created_at: Option>, } -impl IdentifiedFile { - pub fn new( - file_path: file_path_for_file_identifier::Data, - cas_id: impl Into, - kind: ObjectKind, - ) -> Self { - Self { - file_path, - cas_id: cas_id.into(), - kind, - } - } -} +#[instrument(skip(sync, db))] +fn connect_file_path_to_object<'db>( + file_path_pub_id: &FilePathPubId, + object_pub_id: &ObjectPubId, + db: &'db PrismaClient, + sync: &SyncManager, +) -> (CRDTOperation, Select<'db, file_path_id::Data>) { + trace!("Connecting"); -#[derive(Debug, Serialize, Deserialize)] -struct ObjectToCreateOrLink { - file_path_pub_id: FilePathPubId, - kind: ObjectKind, - created_at: Option>, + ( + sync.shared_update( + prisma_sync::file_path::SyncId { + pub_id: file_path_pub_id.to_db(), + }, + file_path::object::NAME, + msgpack!(prisma_sync::object::SyncId { + pub_id: object_pub_id.to_db(), + }), + ), + db.file_path() + .update( + file_path::pub_id::equals(file_path_pub_id.to_db()), + vec![file_path::object::connect(object::pub_id::equals( + object_pub_id.to_db(), + ))], + ) + // selecting just id to avoid fetching the whole object + .select(file_path_id::select()), + ) } #[instrument(skip_all, ret(level = Level::TRACE), err)] -async fn create_objects( - files_and_kinds: impl IntoIterator, +async fn create_objects_and_update_file_paths( + files_and_kinds: impl IntoIterator + Send, db: &PrismaClient, sync: &SyncManager, -) -> Result { +) -> Result, file_identifier::Error> { trace!("Creating new Objects!"); let (object_create_args, file_path_update_args) = files_and_kinds .into_iter() .map( - |( - file_path_to_create_object::Data { - pub_id: file_path_pub_id, - date_created, - }, - kind, - )| { + |FilePathToCreateOrLinkObject { + file_path_pub_id, + created_at, + kind, + .. + }| { let object_pub_id = ObjectPubId::new(); - let kind = *kind as i32; + let kind = kind as i32; let (sync_params, db_params) = [ ( - (object::date_created::NAME, msgpack!(date_created)), - object::date_created::set(*date_created), + (object::date_created::NAME, msgpack!(created_at)), + object::date_created::set(created_at), ), ( (object::kind::NAME, msgpack!(kind)), @@ -96,33 +105,14 @@ async fn create_objects( ), object::create_unchecked(object_pub_id.to_db(), db_params), ), - ( - sync.shared_update( - prisma_sync::file_path::SyncId { - pub_id: file_path_pub_id.clone(), - }, - file_path::object::NAME, - msgpack!(prisma_sync::object::SyncId { - pub_id: object_pub_id.to_db() - }), - ), - db.file_path() - .update( - file_path::pub_id::equals(file_path_pub_id.clone()), - vec![file_path::object::connect(object::pub_id::equals( - object_pub_id.into(), - ))], - ) - // selecting just id to avoid fetching the whole object - .select(file_path::select!({ id })), - ), + connect_file_path_to_object(&file_path_pub_id, &object_pub_id, db, sync), ) }, ) .unzip::<_, _, Vec<_>, Vec<_>>(); // create new object records with assembled values - let total_created_files = sync + let created_objects_count = sync .write_ops(db, { let (sync, db_params) = object_create_args .into_iter() @@ -135,9 +125,9 @@ async fn create_objects( }) .await?; - trace!(%total_created_files, "Created new Objects"); + trace!(%created_objects_count, "Created new Objects"); - if total_created_files > 0 { + if created_objects_count > 0 { trace!("Updating file paths with created objects"); sync.write_ops( @@ -146,11 +136,16 @@ async fn create_objects( .into_iter() .unzip::<_, _, Vec<_>, Vec<_>>(), ) - .await?; - - trace!("Updated file paths with created objects"); + .await + .map(|file_paths| { + file_paths + .into_iter() + .map(|file_path_id::Data { id }| id) + .collect() + }) + .map_err(Into::into) + } else { + trace!("No objects created, skipping file path updates"); + Ok(vec![]) } - - #[allow(clippy::cast_sign_loss)] // SAFETY: We're sure the value is positive - Ok(total_created_files as u64) } diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs index b3cb1343bec6..8e412d3475d7 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs @@ -1,98 +1,59 @@ use crate::{file_identifier, Error}; -use sd_core_prisma_helpers::{ - file_path_for_file_identifier, file_path_pub_id, object_for_file_identifier, -}; +use sd_core_prisma_helpers::{object_for_file_identifier, CasId, ObjectPubId}; use sd_core_sync::Manager as SyncManager; -use sd_prisma::{ - prisma::{file_path, object, PrismaClient}, - prisma_sync, -}; -use sd_sync::{CRDTOperation, OperationFactory}; +use sd_prisma::prisma::{file_path, object, PrismaClient}; use sd_task_system::{ check_interruption, ExecStatus, Interrupter, IntoAnyTaskOutput, SerializableTask, Task, TaskId, }; -use sd_utils::{msgpack, uuid_to_bytes}; -use std::{ - collections::{HashMap, HashSet}, - mem, - sync::Arc, - time::Duration, -}; +use std::{collections::HashMap, mem, sync::Arc, time::Duration}; -use prisma_client_rust::Select; use serde::{Deserialize, Serialize}; use tokio::time::Instant; -use tracing::trace; -use uuid::Uuid; +use tracing::{instrument, trace, Level}; -use super::IdentifiedFile; +use super::{ + connect_file_path_to_object, create_objects_and_update_file_paths, FilePathToCreateOrLinkObject, +}; #[derive(Debug)] -pub struct ObjectProcessorTask { +pub struct ObjectProcessor { id: TaskId, - db: Arc, - sync: Arc, - identified_files: HashMap, - output: Output, + file_paths_by_cas_id: HashMap>, + stage: Stage, - with_priority: bool, -} -#[derive(Debug, Serialize, Deserialize)] -pub struct SaveState { - id: TaskId, - identified_files: HashMap, output: Output, - stage: Stage, + with_priority: bool, -} -#[derive(Debug, Serialize, Deserialize, Default)] -pub struct Output { - pub file_path_ids_with_new_object: Vec, - pub assign_cas_ids_time: Duration, - pub fetch_existing_objects_time: Duration, - pub assign_to_existing_object_time: Duration, - pub create_object_time: Duration, - pub created_objects_count: u64, - pub linked_objects_count: u64, + db: Arc, + sync: Arc, } #[derive(Debug, Serialize, Deserialize)] enum Stage { Starting, - FetchExistingObjects, AssignFilePathsToExistingObjects { - existing_objects_by_cas_id: HashMap, + existing_objects_by_cas_id: HashMap, }, CreateObjects, } -impl ObjectProcessorTask { - #[must_use] - pub fn new( - identified_files: HashMap, - db: Arc, - sync: Arc, - with_priority: bool, - ) -> Self { - Self { - id: TaskId::new_v4(), - db, - sync, - identified_files, - stage: Stage::Starting, - output: Output::default(), - with_priority, - } - } +#[derive(Debug, Serialize, Deserialize, Default)] +pub struct Output { + pub file_path_ids_with_new_object: Vec, + pub fetch_existing_objects_time: Duration, + pub assign_to_existing_object_time: Duration, + pub create_object_time: Duration, + pub created_objects_count: u64, + pub linked_objects_count: u64, } #[async_trait::async_trait] -impl Task for ObjectProcessorTask { +impl Task for ObjectProcessor { fn id(&self) -> TaskId { self.id } @@ -101,16 +62,25 @@ impl Task for ObjectProcessorTask { self.with_priority } + #[instrument( + skip(self, interrupter), + fields( + task_id = %self.id, + cas_ids_count = %self.file_paths_by_cas_id.len(), + ), + ret(level = Level::TRACE), + err, + )] + #[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above async fn run(&mut self, interrupter: &Interrupter) -> Result { let Self { db, sync, - identified_files, + file_paths_by_cas_id, stage, output: Output { file_path_ids_with_new_object, - assign_cas_ids_time, fetch_existing_objects_time, assign_to_existing_object_time, create_object_time, @@ -123,17 +93,17 @@ impl Task for ObjectProcessorTask { loop { match stage { Stage::Starting => { - let start = Instant::now(); - assign_cas_id_to_file_paths(identified_files, db, sync).await?; - *assign_cas_ids_time = start.elapsed(); - *stage = Stage::FetchExistingObjects; - } - - Stage::FetchExistingObjects => { + trace!("Starting object processor task"); let start = Instant::now(); let existing_objects_by_cas_id = - fetch_existing_objects_by_cas_id(identified_files, db).await?; + fetch_existing_objects_by_cas_id(file_paths_by_cas_id.keys(), db).await?; *fetch_existing_objects_time = start.elapsed(); + + trace!( + elapsed_time = ?fetch_existing_objects_time, + existing_objects_count = existing_objects_by_cas_id.len(), + "Fetched existing Objects", + ); *stage = Stage::AssignFilePathsToExistingObjects { existing_objects_by_cas_id, }; @@ -142,48 +112,49 @@ impl Task for ObjectProcessorTask { Stage::AssignFilePathsToExistingObjects { existing_objects_by_cas_id, } => { + trace!("Assigning file paths to existing Objects"); let start = Instant::now(); - let assigned_file_path_pub_ids = assign_existing_objects_to_file_paths( - identified_files, + *linked_objects_count = assign_existing_objects_to_file_paths( + file_paths_by_cas_id, existing_objects_by_cas_id, db, sync, ) .await?; *assign_to_existing_object_time = start.elapsed(); - *linked_objects_count = assigned_file_path_pub_ids.len() as u64; trace!( - "Found {} existing Objects, linked file paths to them", - existing_objects_by_cas_id.len() + existing_objects_to_link = existing_objects_by_cas_id.len(), + %linked_objects_count, + "Found existing Objects, linked file paths to them", ); - for file_path_pub_id::Data { pub_id } in assigned_file_path_pub_ids { - let pub_id = Uuid::from_slice(&pub_id).expect("uuid bytes are invalid"); - trace!("Assigned file path to existing object"); - - identified_files - .remove(&pub_id) - .expect("file_path must be here"); - } - *stage = Stage::CreateObjects; - if identified_files.is_empty() { + if file_paths_by_cas_id.is_empty() { + trace!("No more objects to be created, finishing task"); // No objects to be created, we're good to finish already break; } } Stage::CreateObjects => { + trace!( + creating_count = file_paths_by_cas_id.len(), + "Creating new Objects" + ); let start = Instant::now(); - *created_objects_count = create_objects(identified_files, db, sync).await?; + *file_path_ids_with_new_object = create_objects_and_update_file_paths( + mem::take(file_paths_by_cas_id).into_values().flatten(), + db, + sync, + ) + .await?; *create_object_time = start.elapsed(); - *file_path_ids_with_new_object = identified_files - .values() - .map(|IdentifiedFile { file_path, .. }| file_path.id) - .collect(); + *created_objects_count = file_path_ids_with_new_object.len() as u64; + + trace!(%created_objects_count, ?create_object_time, "Created new Objects"); break; } @@ -196,225 +167,114 @@ impl Task for ObjectProcessorTask { } } -async fn assign_cas_id_to_file_paths( - identified_files: &HashMap, - db: &PrismaClient, - sync: &SyncManager, -) -> Result<(), file_identifier::Error> { - // Assign cas_id to each file path - sync.write_ops( - db, - identified_files - .iter() - .map(|(pub_id, IdentifiedFile { cas_id, .. })| { - ( - sync.shared_update( - prisma_sync::file_path::SyncId { - pub_id: uuid_to_bytes(*pub_id), - }, - file_path::cas_id::NAME, - msgpack!(cas_id), - ), - db.file_path() - .update( - file_path::pub_id::equals(uuid_to_bytes(*pub_id)), - vec![file_path::cas_id::set(cas_id.clone())], - ) - // We don't need any data here, just the id avoids receiving the entire object - // as we can't pass an empty select macro call - .select(file_path::select!({ id })), - ) - }) - .unzip::<_, _, _, Vec<_>>(), - ) - .await?; - - Ok(()) +impl ObjectProcessor { + #[must_use] + pub fn new( + file_paths_by_cas_id: HashMap>, + db: Arc, + sync: Arc, + with_priority: bool, + ) -> Self { + Self { + id: TaskId::new_v4(), + db, + sync, + file_paths_by_cas_id, + stage: Stage::Starting, + output: Output::default(), + with_priority, + } + } } -async fn fetch_existing_objects_by_cas_id( - identified_files: &HashMap, +/// Retrieves objects that are already connected to file paths with the same cas_id +#[instrument(skip_all, err)] +async fn fetch_existing_objects_by_cas_id<'cas_id, Iter>( + cas_ids: Iter, db: &PrismaClient, -) -> Result, file_identifier::Error> { - // Retrieves objects that are already connected to file paths with the same id - db.object() - .find_many(vec![object::file_paths::some(vec![ - file_path::cas_id::in_vec( - identified_files - .values() - .filter_map(|IdentifiedFile { cas_id, .. }| cas_id.as_ref()) - .cloned() - .collect::>() +) -> Result, file_identifier::Error> +where + Iter: IntoIterator + Send, + Iter::IntoIter: Send, +{ + async fn inner( + stringed_cas_ids: Vec, + db: &PrismaClient, + ) -> Result, file_identifier::Error> { + db.object() + .find_many(vec![object::file_paths::some(vec![ + file_path::cas_id::in_vec(stringed_cas_ids), + file_path::object_id::not(None), + ])]) + .select(object_for_file_identifier::select()) + .exec() + .await + .map_err(Into::into) + .map(|objects| { + objects .into_iter() - .collect(), - ), - ])]) - .select(object_for_file_identifier::select()) - .exec() - .await - .map_err(Into::into) - .map(|objects| { - objects - .into_iter() - .filter_map(|object| { - object - .file_paths - .first() - .and_then(|file_path| file_path.cas_id.clone()) - .map(|cas_id| (cas_id, object)) - }) - .collect() - }) + .filter_map(|object_for_file_identifier::Data { pub_id, file_paths }| { + file_paths + .first() + .and_then(|file_path| file_path.cas_id.as_ref()) + .map(|cas_id| (cas_id.into(), pub_id.into())) + }) + .collect() + }) + } + + inner(cas_ids.into_iter().map(Into::into).collect::>(), db).await } +/// Attempt to associate each file path with an object that has been +/// connected to file paths with the same cas_id +#[instrument(skip_all, err, fields(identified_files_count = file_paths_by_cas_id.len()))] async fn assign_existing_objects_to_file_paths( - identified_files: &HashMap, - objects_by_cas_id: &HashMap, + file_paths_by_cas_id: &mut HashMap>, + objects_by_cas_id: &HashMap, db: &PrismaClient, sync: &SyncManager, -) -> Result, file_identifier::Error> { - // Attempt to associate each file path with an object that has been - // connected to file paths with the same cas_id +) -> Result { sync.write_ops( db, - identified_files + objects_by_cas_id .iter() - .filter_map(|(pub_id, IdentifiedFile { cas_id, .. })| { - objects_by_cas_id - // Filtering out files without cas_id due to being empty - .get(cas_id.as_ref()?) - .map(|object| (*pub_id, object)) - }) - .map(|(pub_id, object)| { - connect_file_path_to_object( - pub_id, - // SAFETY: This pub_id is generated by the uuid lib, but we have to store bytes in sqlite - Uuid::from_slice(&object.pub_id).expect("uuid bytes are invalid"), - sync, - db, - ) + .flat_map(|(cas_id, object_pub_id)| { + file_paths_by_cas_id + .remove(cas_id) + .map(|file_paths| { + file_paths.into_iter().map( + |FilePathToCreateOrLinkObject { + file_path_pub_id, .. + }| { + connect_file_path_to_object( + &file_path_pub_id, + object_pub_id, + db, + sync, + ) + }, + ) + }) + .expect("must be here") }) .unzip::<_, _, Vec<_>, Vec<_>>(), ) .await + .map(|file_paths| file_paths.len() as u64) .map_err(Into::into) } -fn connect_file_path_to_object<'db>( - file_path_pub_id: Uuid, - object_pub_id: Uuid, - sync: &SyncManager, - db: &'db PrismaClient, -) -> (CRDTOperation, Select<'db, file_path_pub_id::Data>) { - trace!("Connecting to "); - - let vec_id = object_pub_id.as_bytes().to_vec(); - - ( - sync.shared_update( - prisma_sync::file_path::SyncId { - pub_id: uuid_to_bytes(file_path_pub_id), - }, - file_path::object::NAME, - msgpack!(prisma_sync::object::SyncId { - pub_id: vec_id.clone() - }), - ), - db.file_path() - .update( - file_path::pub_id::equals(uuid_to_bytes(file_path_pub_id)), - vec![file_path::object::connect(object::pub_id::equals(vec_id))], - ) - .select(file_path_pub_id::select()), - ) -} - -async fn create_objects( - identified_files: &HashMap, - db: &PrismaClient, - sync: &SyncManager, -) -> Result { - trace!("Creating {} new Objects", identified_files.len(),); - - let (object_create_args, file_path_update_args) = identified_files - .iter() - .map( - |( - file_path_pub_id, - IdentifiedFile { - file_path: file_path_for_file_identifier::Data { date_created, .. }, - kind, - .. - }, - )| { - let object_pub_id = Uuid::new_v4(); - - let kind = *kind as i32; - - let (sync_params, db_params) = [ - ( - (object::date_created::NAME, msgpack!(date_created)), - object::date_created::set(*date_created), - ), - ( - (object::kind::NAME, msgpack!(kind)), - object::kind::set(Some(kind)), - ), - ] - .into_iter() - .unzip::<_, _, Vec<_>, Vec<_>>(); - - ( - ( - sync.shared_create( - prisma_sync::object::SyncId { - pub_id: uuid_to_bytes(object_pub_id), - }, - sync_params, - ), - object::create_unchecked(uuid_to_bytes(object_pub_id), db_params), - ), - connect_file_path_to_object(*file_path_pub_id, object_pub_id, sync, db), - ) - }, - ) - .unzip::<_, _, Vec<_>, Vec<_>>(); - - // create new object records with assembled values - let total_created_files = sync - .write_ops(db, { - let (sync, db_params) = object_create_args - .into_iter() - .unzip::<_, _, Vec<_>, Vec<_>>(); - - ( - sync.into_iter().flatten().collect(), - db.object().create_many(db_params), - ) - }) - .await?; - - trace!("Created {total_created_files} new Objects"); - - if total_created_files > 0 { - trace!("Updating file paths with created objects"); - - sync.write_ops( - db, - file_path_update_args - .into_iter() - .unzip::<_, _, Vec<_>, Vec<_>>(), - ) - .await?; - - trace!("Updated file paths with created objects"); - } - - #[allow(clippy::cast_sign_loss)] // SAFETY: We're sure the value is positive - Ok(total_created_files as u64) +#[derive(Debug, Serialize, Deserialize)] +pub struct SaveState { + id: TaskId, + file_paths_by_cas_id: HashMap>, + stage: Stage, + output: Output, + with_priority: bool, } -impl SerializableTask for ObjectProcessorTask { +impl SerializableTask for ObjectProcessor { type SerializeError = rmp_serde::encode::Error; type DeserializeError = rmp_serde::decode::Error; @@ -424,18 +284,18 @@ impl SerializableTask for ObjectProcessorTask { async fn serialize(self) -> Result, Self::SerializeError> { let Self { id, - identified_files, - output, + file_paths_by_cas_id, stage, + output, with_priority, .. } = self; rmp_serde::to_vec_named(&SaveState { id, - identified_files, - output, + file_paths_by_cas_id, stage, + output, with_priority, }) } @@ -447,18 +307,18 @@ impl SerializableTask for ObjectProcessorTask { rmp_serde::from_slice(data).map( |SaveState { id, - identified_files, - output, + file_paths_by_cas_id, stage, + output, with_priority, }| Self { id, - db, - sync, - identified_files, - output, + file_paths_by_cas_id, stage, + output, with_priority, + db, + sync, }, ) } diff --git a/core/crates/heavy-lifting/src/indexer/mod.rs b/core/crates/heavy-lifting/src/indexer/mod.rs index 8d6ad4e3d9fb..a842934715b1 100644 --- a/core/crates/heavy-lifting/src/indexer/mod.rs +++ b/core/crates/heavy-lifting/src/indexer/mod.rs @@ -347,7 +347,8 @@ async fn compute_sizes( db: &PrismaClient, errors: &mut Vec, ) -> Result<(), QueryError> { - db.file_path() + for file_path in db + .file_path() .find_many(vec![ file_path::location_id::equals(Some(location_id)), file_path::materialized_path::in_vec(materialized_paths), @@ -355,30 +356,29 @@ async fn compute_sizes( .select(file_path::select!({ pub_id materialized_path size_in_bytes_bytes })) .exec() .await? - .into_iter() - .for_each(|file_path| { - if let Some(materialized_path) = file_path.materialized_path { - if let Some((_, size)) = - pub_id_by_ancestor_materialized_path.get_mut(&materialized_path) - { - *size += file_path.size_in_bytes_bytes.map_or_else( - || { - warn!("Got a directory missing its size in bytes"); - 0 - }, - |size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes), - ); - } - } else { - errors.push( - NonCriticalIndexerError::MissingFilePathData(format!( + { + if let Some(materialized_path) = file_path.materialized_path { + if let Some((_, size)) = + pub_id_by_ancestor_materialized_path.get_mut(&materialized_path) + { + *size += file_path.size_in_bytes_bytes.map_or_else( + || { + warn!("Got a directory missing its size in bytes"); + 0 + }, + |size_in_bytes_bytes| size_in_bytes_from_db(&size_in_bytes_bytes), + ); + } + } else { + errors.push( + NonCriticalIndexerError::MissingFilePathData(format!( "Corrupt database possessing a file_path entry without materialized_path: ", from_bytes_to_uuid(&file_path.pub_id) )) - .into(), - ); - } - }); + .into(), + ); + } + } Ok(()) } diff --git a/core/crates/heavy-lifting/src/indexer/tasks/saver.rs b/core/crates/heavy-lifting/src/indexer/tasks/saver.rs index 3c97b01ad76e..fae1c901271f 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/saver.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/saver.rs @@ -165,63 +165,74 @@ impl Task for SaveTask { let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked_entries .drain(..) - .map(|entry| { - let IsolatedFilePathDataParts { - materialized_path, - is_dir, - name, - extension, - .. - } = entry.iso_file_path.to_parts(); - - let pub_id = sd_utils::uuid_to_bytes(entry.pub_id); - - let (sync_params, db_params): (Vec<_>, Vec<_>) = [ - ( + .map( + |WalkedEntry { + pub_id, + maybe_object_id, + iso_file_path, + metadata, + }| { + let IsolatedFilePathDataParts { + materialized_path, + is_dir, + name, + extension, + .. + } = iso_file_path.to_parts(); + + assert!( + maybe_object_id.is_none(), + "Object ID must be None as this tasks only created \ + new file_paths and they were not identified yet" + ); + + let (sync_params, db_params): (Vec<_>, Vec<_>) = [ ( - location::NAME, - msgpack!(prisma_sync::location::SyncId { - pub_id: location_pub_id.clone() - }), + ( + location::NAME, + msgpack!(prisma_sync::location::SyncId { + pub_id: location_pub_id.clone() + }), + ), + location_id::set(Some(*location_id)), ), - location_id::set(Some(*location_id)), - ), - sync_db_entry!(materialized_path.to_string(), materialized_path), - sync_db_entry!(name.to_string(), name), - sync_db_entry!(is_dir, is_dir), - sync_db_entry!(extension.to_string(), extension), - sync_db_entry!( - entry.metadata.size_in_bytes.to_be_bytes().to_vec(), - size_in_bytes_bytes - ), - sync_db_entry!(inode_to_db(entry.metadata.inode), inode), - { - let v = entry.metadata.created_at.into(); - sync_db_entry!(v, date_created) - }, - { - let v = entry.metadata.modified_at.into(); - sync_db_entry!(v, date_modified) - }, - { - let v = Utc::now().into(); - sync_db_entry!(v, date_indexed) - }, - sync_db_entry!(entry.metadata.hidden, hidden), - ] - .into_iter() - .unzip(); - - ( - sync.shared_create( - prisma_sync::file_path::SyncId { - pub_id: sd_utils::uuid_to_bytes(entry.pub_id), + sync_db_entry!(materialized_path.to_string(), materialized_path), + sync_db_entry!(name.to_string(), name), + sync_db_entry!(is_dir, is_dir), + sync_db_entry!(extension.to_string(), extension), + sync_db_entry!( + metadata.size_in_bytes.to_be_bytes().to_vec(), + size_in_bytes_bytes + ), + sync_db_entry!(inode_to_db(metadata.inode), inode), + { + let v = metadata.created_at.into(); + sync_db_entry!(v, date_created) }, - sync_params, - ), - create_unchecked(pub_id, db_params), - ) - }) + { + let v = metadata.modified_at.into(); + sync_db_entry!(v, date_modified) + }, + { + let v = Utc::now().into(); + sync_db_entry!(v, date_indexed) + }, + sync_db_entry!(metadata.hidden, hidden), + ] + .into_iter() + .unzip(); + + ( + sync.shared_create( + prisma_sync::file_path::SyncId { + pub_id: pub_id.to_db(), + }, + sync_params, + ), + create_unchecked(pub_id.into(), db_params), + ) + }, + ) .unzip(); #[allow(clippy::cast_sign_loss)] diff --git a/core/crates/heavy-lifting/src/indexer/tasks/updater.rs b/core/crates/heavy-lifting/src/indexer/tasks/updater.rs index e547ec8ac7b8..68041fb72a23 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/updater.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/updater.rs @@ -158,62 +158,69 @@ impl Task for UpdateTask { let (sync_stuff, paths_to_update) = walked_entries .drain(..) - .map(|entry| { - let IsolatedFilePathDataParts { is_dir, .. } = &entry.iso_file_path.to_parts(); - - let pub_id = sd_utils::uuid_to_bytes(entry.pub_id); - - let should_unlink_object = entry.maybe_object_id.map_or(false, |object_id| { - object_ids_that_should_be_unlinked.contains(&object_id) - }); - - let (sync_params, db_params) = chain_optional_iter( - [ - ((cas_id::NAME, msgpack!(nil)), cas_id::set(None)), - sync_db_entry!(*is_dir, is_dir), - sync_db_entry!( - entry.metadata.size_in_bytes.to_be_bytes().to_vec(), - size_in_bytes_bytes - ), - sync_db_entry!(inode_to_db(entry.metadata.inode), inode), - { - let v = entry.metadata.created_at.into(); - sync_db_entry!(v, date_created) - }, - { - let v = entry.metadata.modified_at.into(); - sync_db_entry!(v, date_modified) - }, - sync_db_entry!(entry.metadata.hidden, hidden), - ], - [ - // As this file was updated while Spacedrive was offline, we mark the object_id and cas_id as null - // So this file_path will be updated at file identifier job - should_unlink_object - .then_some(((object_id::NAME, msgpack!(nil)), object::disconnect())), - ], - ) - .into_iter() - .unzip::<_, _, Vec<_>, Vec<_>>(); - - ( - sync_params - .into_iter() - .map(|(field, value)| { - sync.shared_update( - prisma_sync::file_path::SyncId { - pub_id: pub_id.clone(), - }, - field, - value, - ) - }) - .collect::>(), - db.file_path() - .update(file_path::pub_id::equals(pub_id), db_params) - .select(file_path::select!({ id })), - ) - }) + .map( + |WalkedEntry { + pub_id, + maybe_object_id, + iso_file_path, + metadata, + }| { + let IsolatedFilePathDataParts { is_dir, .. } = &iso_file_path.to_parts(); + + let should_unlink_object = maybe_object_id.map_or(false, |object_id| { + object_ids_that_should_be_unlinked.contains(&object_id) + }); + + let (sync_params, db_params) = chain_optional_iter( + [ + ((cas_id::NAME, msgpack!(nil)), cas_id::set(None)), + sync_db_entry!(*is_dir, is_dir), + sync_db_entry!( + metadata.size_in_bytes.to_be_bytes().to_vec(), + size_in_bytes_bytes + ), + sync_db_entry!(inode_to_db(metadata.inode), inode), + { + let v = metadata.created_at.into(); + sync_db_entry!(v, date_created) + }, + { + let v = metadata.modified_at.into(); + sync_db_entry!(v, date_modified) + }, + sync_db_entry!(metadata.hidden, hidden), + ], + [ + // As this file was updated while Spacedrive was offline, we mark the object_id and cas_id as null + // So this file_path will be updated at file identifier job + should_unlink_object.then_some(( + (object_id::NAME, msgpack!(nil)), + object::disconnect(), + )), + ], + ) + .into_iter() + .unzip::<_, _, Vec<_>, Vec<_>>(); + + ( + sync_params + .into_iter() + .map(|(field, value)| { + sync.shared_update( + prisma_sync::file_path::SyncId { + pub_id: pub_id.to_db(), + }, + field, + value, + ) + }) + .collect::>(), + db.file_path() + .update(file_path::pub_id::equals(pub_id.into()), db_params) + .select(file_path::select!({ id })), + ) + }, + ) .unzip::<_, _, Vec<_>, Vec<_>>(); let updated = sync diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/entry.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/entry.rs index f12baf8c21b2..4e460f972f98 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/walker/entry.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/entry.rs @@ -1,5 +1,6 @@ use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData}; +use sd_core_prisma_helpers::FilePathPubId; use sd_prisma::prisma::file_path; use std::{ @@ -8,12 +9,11 @@ use std::{ }; use serde::{Deserialize, Serialize}; -use uuid::Uuid; /// `WalkedEntry` represents a single path in the filesystem #[derive(Debug, Serialize, Deserialize)] pub struct WalkedEntry { - pub pub_id: Uuid, + pub pub_id: FilePathPubId, pub maybe_object_id: file_path::object_id::Type, pub iso_file_path: IsolatedFilePathData<'static>, pub metadata: FilePathMetadata, @@ -47,7 +47,7 @@ impl From for WalkedEntry { }: WalkingEntry, ) -> Self { Self { - pub_id: Uuid::new_v4(), + pub_id: FilePathPubId::new(), maybe_object_id: None, iso_file_path, metadata, @@ -55,7 +55,9 @@ impl From for WalkedEntry { } } -impl From<(Uuid, file_path::object_id::Type, WalkingEntry)> for WalkedEntry { +impl> From<(PubId, file_path::object_id::Type, WalkingEntry)> + for WalkedEntry +{ fn from( ( pub_id, @@ -64,10 +66,10 @@ impl From<(Uuid, file_path::object_id::Type, WalkingEntry)> for WalkedEntry { iso_file_path, metadata, }, - ): (Uuid, file_path::object_id::Type, WalkingEntry), + ): (PubId, file_path::object_id::Type, WalkingEntry), ) -> Self { Self { - pub_id, + pub_id: pub_id.into(), maybe_object_id, iso_file_path, metadata, diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs index b74ba5270677..bc777de76f1f 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs @@ -524,7 +524,7 @@ async fn segregate_creates_and_updates( .unwrap_or_default() ) { to_update.push( - WalkedEntry::from((sd_utils::from_bytes_to_uuid(&file_path.pub_id), file_path.object_id, entry)), + WalkedEntry::from((&file_path.pub_id, file_path.object_id, entry)), ); } } @@ -647,18 +647,18 @@ async fn gather_file_paths_to_remove( mod tests { use super::*; - use futures::stream::FuturesUnordered; use sd_core_indexer_rules::{IndexerRule, RulePerKind}; + use sd_core_prisma_helpers::FilePathPubId; use sd_task_system::{TaskOutput, TaskStatus, TaskSystem}; use chrono::Utc; + use futures::stream::FuturesUnordered; use globset::{Glob, GlobSetBuilder}; use lending_stream::{LendingStream, StreamExt}; use tempfile::{tempdir, TempDir}; use tokio::{fs, io::AsyncWriteExt}; use tracing::debug; use tracing_test::traced_test; - use uuid::Uuid; #[derive(Debug, Clone)] struct DummyIsoPathFactory { @@ -912,33 +912,33 @@ mod tests { }; let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); - let pub_id = Uuid::new_v4(); + let pub_id = FilePathPubId::new(); let maybe_object_id = None; #[rustfmt::skip] let expected = [ - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react/package.json"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo3.jpeg"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react/package.json"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("photos"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("photos/photo3.jpeg"), false), metadata }, WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/text.txt"), false), metadata }, ] .into_iter() @@ -962,14 +962,14 @@ mod tests { }; let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); - let pub_id = Uuid::new_v4(); + let pub_id = FilePathPubId::new(); let maybe_object_id = None; #[rustfmt::skip] let expected = [ - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("photos"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo3.jpeg"), false), metadata }, ] .into_iter() @@ -1008,28 +1008,28 @@ mod tests { }; let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); - let pub_id = Uuid::new_v4(); + let pub_id = FilePathPubId::new(); let maybe_object_id = None; #[rustfmt::skip] let expected = [ - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react/package.json"), false), metadata }, ] .into_iter() @@ -1064,25 +1064,25 @@ mod tests { }; let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); - let pub_id = Uuid::new_v4(); + let pub_id = FilePathPubId::new(); let maybe_object_id = None; #[rustfmt::skip] let expected = [ - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/.gitignore"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/partial/readme"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.gitignore"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, + WalkedEntry { pub_id: pub_id.clone(), maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, ] .into_iter() diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index 4e153756f5c6..05704f3d0997 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -25,7 +25,7 @@ use tokio::{ time::{interval_at, Instant}, }; use tokio_stream::wrappers::IntervalStream; -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, error, info, instrument, trace, warn}; use uuid::Uuid; use super::{ @@ -256,6 +256,7 @@ impl> JobSystemRunner>( match msg { // Job return status messages StreamMessage::ReturnStatus((job_id, status)) => { - trace!("Received return status for job: "); if let Err(e) = runner.process_return_status(job_id, status).await { - error!("Failed to process return status: {e:#?}"); + error!(?e, "Failed to process return status"); } } @@ -611,12 +611,11 @@ pub(super) async fn run>( serialized_tasks, ack_tx, }) => { + let res = runner + .new_job(id, location_id, dyn_job, ctx, serialized_tasks) + .await; ack_tx - .send( - runner - .new_job(id, location_id, dyn_job, ctx, serialized_tasks) - .await, - ) + .send(res) .expect("ack channel closed before sending resume job response"); } diff --git a/core/crates/heavy-lifting/src/job_system/store.rs b/core/crates/heavy-lifting/src/job_system/store.rs index 3a0d5a833322..5ba68f84a2d6 100644 --- a/core/crates/heavy-lifting/src/job_system/store.rs +++ b/core/crates/heavy-lifting/src/job_system/store.rs @@ -81,7 +81,7 @@ pub async fn load_jobs>( .. }| { iter::once(*id).chain(next_jobs.iter().map(|StoredJob { id, .. }| *id)) }, ) - .map(uuid_to_bytes) + .map(|job_id| uuid_to_bytes(&job_id)) .collect::>(), )]) .exec() diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs b/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs index 3cc9de27ed40..314eec128b85 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs @@ -120,7 +120,7 @@ pub async fn save( sync.shared_create( prisma_sync::exif_data::SyncId { object: prisma_sync::object::SyncId { - pub_id: uuid_to_bytes(object_pub_id), + pub_id: uuid_to_bytes(&object_pub_id), }, }, sync_params, diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs b/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs index 66ecc6a155f0..9bd1f5d3de55 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs @@ -155,6 +155,7 @@ impl Task for MediaDataExtractor { false } + #[allow(clippy::too_many_lines)] async fn run(&mut self, interrupter: &Interrupter) -> Result { loop { match &mut self.stage { @@ -174,8 +175,7 @@ impl Task for MediaDataExtractor { Stage::FetchedObjectsAlreadyWithMediaData(objects_already_with_media_data) => { let filtering_start = Instant::now(); if self.file_paths.len() == objects_already_with_media_data.len() { - // All files already have media data, skipping - self.output.skipped = self.file_paths.len() as u64; + self.output.skipped = self.file_paths.len() as u64; // Files already have media data, skipping break; } diff --git a/core/crates/indexer-rules/src/lib.rs b/core/crates/indexer-rules/src/lib.rs index 4f4b6c9fb787..e30b5e506b10 100644 --- a/core/crates/indexer-rules/src/lib.rs +++ b/core/crates/indexer-rules/src/lib.rs @@ -51,7 +51,7 @@ use rspc::ErrorCode; use specta::Type; use thiserror::Error; -use tokio::{fs, sync::RwLock}; +use tokio::fs; use tracing::debug; use uuid::Uuid; diff --git a/core/crates/prisma-helpers/src/lib.rs b/core/crates/prisma-helpers/src/lib.rs index ad8c4154914b..6ac6d97304da 100644 --- a/core/crates/prisma-helpers/src/lib.rs +++ b/core/crates/prisma-helpers/src/lib.rs @@ -37,6 +37,7 @@ use serde::{Deserialize, Serialize}; use uuid::Uuid; // File Path selectables! +file_path::select!(file_path_id { id }); file_path::select!(file_path_pub_id { pub_id }); file_path::select!(file_path_pub_and_cas_ids { id pub_id cas_id }); file_path::select!(file_path_just_pub_id_materialized_path { @@ -146,6 +147,7 @@ file_path::select!(file_path_to_full_path { } }); file_path::select!(file_path_to_create_object { + id pub_id date_created }); @@ -302,14 +304,14 @@ label::include!((take: i64) => label_with_objects { pub struct CasId(String); impl From for file_path::cas_id::Type { - fn from(cas_id: CasId) -> Self { - Some(cas_id.0) + fn from(CasId(cas_id): CasId) -> Self { + Some(cas_id) } } impl From<&CasId> for file_path::cas_id::Type { - fn from(cas_id: &CasId) -> Self { - Some(cas_id.0.clone()) + fn from(CasId(cas_id): &CasId) -> Self { + Some(cas_id.clone()) } } @@ -319,12 +321,30 @@ impl From<&str> for CasId { } } +impl From<&String> for CasId { + fn from(cas_id: &String) -> Self { + Self(cas_id.clone()) + } +} + impl From for CasId { fn from(cas_id: String) -> Self { Self(cas_id) } } +impl From for String { + fn from(CasId(cas_id): CasId) -> Self { + cas_id + } +} + +impl From<&CasId> for String { + fn from(CasId(cas_id): &CasId) -> Self { + cas_id.clone() + } +} + #[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Clone)] #[serde(transparent)] #[repr(transparent)] @@ -372,6 +392,12 @@ impl From> for PubId { } } +impl From<&Vec> for PubId { + fn from(bytes: &Vec) -> Self { + Self::Vec(bytes.clone()) + } +} + impl From<&[u8]> for PubId { fn from(bytes: &[u8]) -> Self { Self::Vec(bytes.to_vec()) @@ -420,6 +446,12 @@ macro_rules! delegate_pub_id { } } + impl From<&Vec> for $type_name { + fn from(bytes: &Vec) -> Self { + Self(bytes.into()) + } + } + impl From<&[u8]> for $type_name { fn from(bytes: &[u8]) -> Self { Self(bytes.into()) diff --git a/core/crates/sync/tests/lib.rs b/core/crates/sync/tests/lib.rs index fa5ee0e2f727..03555953f5e6 100644 --- a/core/crates/sync/tests/lib.rs +++ b/core/crates/sync/tests/lib.rs @@ -30,11 +30,11 @@ async fn write_test_location( ( instance.sync.shared_create( prisma_sync::location::SyncId { - pub_id: uuid_to_bytes(id), + pub_id: uuid_to_bytes(&id), }, sync_ops, ), - instance.db.location().create(uuid_to_bytes(id), db_ops), + instance.db.location().create(uuid_to_bytes(&id), db_ops), ) }) .await?) diff --git a/core/crates/sync/tests/mock_instance.rs b/core/crates/sync/tests/mock_instance.rs index 84b2e4de29b3..695c86bb615a 100644 --- a/core/crates/sync/tests/mock_instance.rs +++ b/core/crates/sync/tests/mock_instance.rs @@ -36,7 +36,7 @@ impl Instance { db.instance() .create( - uuid_to_bytes(id), + uuid_to_bytes(&id), vec![], vec![], Utc::now().into(), @@ -73,7 +73,7 @@ impl Instance { left.db .instance() .create( - uuid_to_bytes(right.id), + uuid_to_bytes(&right.id), vec![], vec![], Utc::now().into(), diff --git a/core/src/api/cloud.rs b/core/src/api/cloud.rs index d13dc6a060ce..2ad606821d8b 100644 --- a/core/src/api/cloud.rs +++ b/core/src/api/cloud.rs @@ -149,9 +149,9 @@ mod library { &library.db, &library.sync, &node.libraries, - instance.uuid, + &instance.uuid, instance.identity, - instance.node_id, + &instance.node_id, node.p2p.peer_metadata(), ) .await?; diff --git a/core/src/api/tags.rs b/core/src/api/tags.rs index 73c9c90cab02..9ac20989e5ab 100644 --- a/core/src/api/tags.rs +++ b/core/src/api/tags.rs @@ -205,7 +205,7 @@ pub(crate) fn mount() -> AlphaRouter { .iter() .filter(|fp| fp.is_dir.unwrap_or_default() && fp.object.is_none()) .map(|fp| { - let id = uuid_to_bytes(Uuid::new_v4()); + let id = uuid_to_bytes(&Uuid::new_v4()); sync_params.extend(sync.shared_create( prisma_sync::object::SyncId { pub_id: id.clone() }, diff --git a/core/src/cloud/sync/receive.rs b/core/src/cloud/sync/receive.rs index b148f154aaa8..fb2ed9380233 100644 --- a/core/src/cloud/sync/receive.rs +++ b/core/src/cloud/sync/receive.rs @@ -55,7 +55,7 @@ pub async fn run_actor( .map(|id| { db.cloud_crdt_operation() .find_first(vec![cloud_crdt_operation::instance::is(vec![ - instance::pub_id::equals(uuid_to_bytes(*id)), + instance::pub_id::equals(uuid_to_bytes(id)), ])]) .order_by(cloud_crdt_operation::timestamp::order( SortOrder::Desc, @@ -164,9 +164,9 @@ pub async fn run_actor( &db, &sync, &libraries, - collection.instance_uuid, + &collection.instance_uuid, instance.identity, - instance.node_id, + &instance.node_id, node.p2p.peer_metadata(), ) .await @@ -244,9 +244,9 @@ pub async fn upsert_instance( db: &PrismaClient, sync: &sd_core_sync::Manager, libraries: &Libraries, - uuid: Uuid, + uuid: &Uuid, identity: RemoteIdentity, - node_id: Uuid, + node_id: &Uuid, metadata: HashMap, ) -> prisma_client_rust::Result<()> { db.instance() @@ -267,7 +267,7 @@ pub async fn upsert_instance( .exec() .await?; - sync.timestamps.write().await.entry(uuid).or_default(); + sync.timestamps.write().await.entry(*uuid).or_default(); // Called again so the new instances are picked up libraries.update_instances_by_id(library_id).await; diff --git a/core/src/library/config.rs b/core/src/library/config.rs index 4a7bad016105..eec1cbcf2b9b 100644 --- a/core/src/library/config.rs +++ b/core/src/library/config.rs @@ -129,7 +129,7 @@ impl LibraryConfig { db.indexer_rule().update_many( vec![indexer_rule::name::equals(Some(name))], vec![indexer_rule::pub_id::set(sd_utils::uuid_to_bytes( - Uuid::from_u128(i as u128), + &Uuid::from_u128(i as u128), ))], ) }) diff --git a/core/src/library/manager/mod.rs b/core/src/library/manager/mod.rs index 49d3bc920240..35c2026ff43a 100644 --- a/core/src/library/manager/mod.rs +++ b/core/src/library/manager/mod.rs @@ -627,9 +627,9 @@ impl Libraries { &library.db, &library.sync, &node.libraries, - instance.uuid, + &instance.uuid, instance.identity, - instance.node_id, + &instance.node_id, instance.metadata, ) .await diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index 890b489bbf2a..4b141f7d5fce 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -264,7 +264,7 @@ async fn inner_create_file( } = if let Some(object) = existing_object { object } else { - let pub_id = uuid_to_bytes(Uuid::new_v4()); + let pub_id = uuid_to_bytes(&Uuid::new_v4()); let date_created: DateTime = DateTime::::from(fs_metadata.created_or_now()).into(); let int_kind = kind as i32; @@ -618,7 +618,7 @@ async fn inner_update_file( .await?; } } else { - let pub_id = uuid_to_bytes(Uuid::new_v4()); + let pub_id = uuid_to_bytes(&Uuid::new_v4()); let date_created: DateTime = DateTime::::from(fs_metadata.created_or_now()).into(); diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index 68201dd7a336..f3086ca00f8e 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -647,7 +647,7 @@ pub async fn relink_location( metadata.relink(*id, location_path).await?; - let pub_id = uuid_to_bytes(metadata.location_pub_id(*id)?); + let pub_id = uuid_to_bytes(&metadata.location_pub_id(*id)?); let path = location_path .to_str() .map(str::to_string) @@ -1168,7 +1168,7 @@ pub async fn create_file_path( .unzip() }; - let pub_id = sd_utils::uuid_to_bytes(Uuid::new_v4()); + let pub_id = sd_utils::uuid_to_bytes(&Uuid::new_v4()); let created_path = sync .write_ops( From f48eb1f7d2711c00e1c28f111eb011b45e34332b Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Tue, 28 May 2024 02:52:42 -0300 Subject: [PATCH 21/33] A bunch of polishing --- .../heavy-lifting/src/file_identifier/job.rs | 99 +++++--- .../heavy-lifting/src/file_identifier/mod.rs | 18 +- .../src/file_identifier/shallow.rs | 33 +-- .../src/file_identifier/tasks/identifier.rs | 19 +- .../src/file_identifier/tasks/mod.rs | 9 +- .../file_identifier/tasks/object_processor.rs | 37 ++- core/crates/heavy-lifting/src/indexer/job.rs | 188 ++++++++------ core/crates/heavy-lifting/src/indexer/mod.rs | 20 +- .../heavy-lifting/src/indexer/shallow.rs | 38 ++- .../heavy-lifting/src/indexer/tasks/mod.rs | 4 + .../heavy-lifting/src/indexer/tasks/saver.rs | 233 ++++++++++-------- .../src/indexer/tasks/updater.rs | 216 +++++++++------- .../src/indexer/tasks/walker/mod.rs | 169 ++++++++----- .../src/indexer/tasks/walker/save_state.rs | 13 +- .../heavy-lifting/src/job_system/job.rs | 136 ++++++---- .../heavy-lifting/src/job_system/mod.rs | 6 +- .../heavy-lifting/src/job_system/report.rs | 5 +- .../heavy-lifting/src/job_system/runner.rs | 41 +-- crates/task-system/src/system.rs | 2 +- 19 files changed, 794 insertions(+), 492 deletions(-) diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index 0f195f3b0a0e..edb2240edb04 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -36,7 +36,7 @@ use futures_concurrency::future::TryJoin; use serde::{Deserialize, Serialize}; use serde_json::json; use tokio::time::Instant; -use tracing::{debug, error, instrument, trace, warn}; +use tracing::{debug, error, instrument, trace, warn, Level}; use super::{ accumulate_file_paths_by_cas_id, dispatch_object_processor_tasks, orphan_path_filters_deep, @@ -145,6 +145,16 @@ impl Job for FileIdentifier { Ok(()) } + #[instrument( + skip_all, + fields( + location_id = self.location.id, + location_path = %self.location_path.display(), + sub_path = ?self.sub_path.as_ref().map(|path| path.display()), + ), + ret(level = Level::TRACE), + err, + )] async fn run( mut self, dispatcher: JobTaskDispatcher, @@ -165,7 +175,7 @@ impl Job for FileIdentifier { } Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => { - warn!("Task returned an empty output"); + warn!(%task_id, "Task returned an empty output"); } Ok(TaskStatus::Shutdown(task)) => { @@ -300,7 +310,7 @@ impl FileIdentifier { .await?; ctx.progress(vec![ - ProgressUpdate::TaskCount(self.metadata.total_identifier_tasks), + ProgressUpdate::TaskCount(u64::from(self.metadata.total_identifier_tasks)), ProgressUpdate::Message(format!( "{} files to be identified", self.metadata.total_found_orphans @@ -312,9 +322,9 @@ impl FileIdentifier { } else { ctx.progress(vec![ ProgressUpdate::TaskCount(if matches!(self.phase, Phase::IdentifyingFiles) { - self.metadata.total_identifier_tasks + u64::from(self.metadata.total_identifier_tasks) } else { - self.metadata.total_object_processor_tasks + u64::from(self.metadata.total_object_processor_tasks) }), ProgressUpdate::Message(format!( "{} files to be identified", @@ -322,6 +332,10 @@ impl FileIdentifier { )), ]) .await; + debug!( + resuming_tasks_count = self.pending_tasks_on_resume.len(), + "Resuming tasks for FileIdentifier job", + ); pending_running_tasks.extend(mem::take(&mut self.pending_tasks_on_resume)); } @@ -393,7 +407,7 @@ impl FileIdentifier { ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Vec> { - self.metadata.extract_metadata_time += extract_metadata_time; + self.metadata.mean_extract_metadata_time += extract_metadata_time; self.metadata.mean_save_db_time_on_identifier_tasks += save_db_time; self.metadata.created_objects_count += created_objects_count; @@ -418,7 +432,7 @@ impl FileIdentifier { self.metadata.completed_identifier_tasks += 1; ctx.progress(vec![ - ProgressUpdate::CompletedTaskCount(self.metadata.completed_identifier_tasks), + ProgressUpdate::CompletedTaskCount(u64::from(self.metadata.completed_identifier_tasks)), ProgressUpdate::Message(format!( "Identified {total_identified_files} of {} files", self.metadata.total_found_orphans @@ -441,10 +455,14 @@ impl FileIdentifier { ) .await; - self.metadata.total_object_processor_tasks = tasks.len() as u64; + #[allow(clippy::cast_possible_truncation)] + { + // SAFETY: we know that `tasks.len()` is a valid u32 as we wouldn't dispatch more than `u32::MAX` tasks + self.metadata.total_object_processor_tasks = tasks.len() as u32; + } ctx.progress(vec![ - ProgressUpdate::TaskCount(self.metadata.total_object_processor_tasks), + ProgressUpdate::TaskCount(u64::from(self.metadata.total_object_processor_tasks)), ProgressUpdate::CompletedTaskCount(0), ProgressUpdate::phase(self.phase), ]) @@ -470,16 +488,18 @@ impl FileIdentifier { }: object_processor::Output, ctx: &impl JobContext, ) { - self.metadata.fetch_existing_objects_time += fetch_existing_objects_time; - self.metadata.assign_to_existing_object_time += assign_to_existing_object_time; - self.metadata.create_object_time += create_object_time; + self.metadata.mean_fetch_existing_objects_time += fetch_existing_objects_time; + self.metadata.mean_assign_to_existing_object_time += assign_to_existing_object_time; + self.metadata.mean_create_object_time += create_object_time; self.metadata.created_objects_count += created_objects_count; self.metadata.linked_objects_count += linked_objects_count; self.metadata.completed_object_processor_tasks += 1; ctx.progress(vec![ - ProgressUpdate::CompletedTaskCount(self.metadata.completed_object_processor_tasks), + ProgressUpdate::CompletedTaskCount(u64::from( + self.metadata.completed_object_processor_tasks, + )), ProgressUpdate::Message(format!( "Processed {} of {} objects", self.metadata.created_objects_count + self.metadata.linked_objects_count, @@ -533,7 +553,7 @@ impl FileIdentifier { .exec() .await?; - trace!("Found {} orphan paths", orphan_paths.len()); + trace!(orphans_count = orphan_paths.len(), "Found orphan paths"); if orphan_paths.is_empty() { break; @@ -659,29 +679,29 @@ struct SaveState { #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct Metadata { - extract_metadata_time: Duration, + mean_extract_metadata_time: Duration, mean_save_db_time_on_identifier_tasks: Duration, - fetch_existing_objects_time: Duration, - assign_to_existing_object_time: Duration, - create_object_time: Duration, + mean_fetch_existing_objects_time: Duration, + mean_assign_to_existing_object_time: Duration, + mean_create_object_time: Duration, seeking_orphans_time: Duration, total_found_orphans: u64, created_objects_count: u64, linked_objects_count: u64, - total_identifier_tasks: u64, - completed_identifier_tasks: u64, - total_object_processor_tasks: u64, - completed_object_processor_tasks: u64, + total_identifier_tasks: u32, + completed_identifier_tasks: u32, + total_object_processor_tasks: u32, + completed_object_processor_tasks: u32, } impl From for Vec { fn from( Metadata { - extract_metadata_time, - mean_save_db_time_on_identifier_tasks, - fetch_existing_objects_time, - assign_to_existing_object_time, - create_object_time, + mut mean_extract_metadata_time, + mut mean_save_db_time_on_identifier_tasks, + mut mean_fetch_existing_objects_time, + mut mean_assign_to_existing_object_time, + mut mean_create_object_time, seeking_orphans_time, total_found_orphans, created_objects_count, @@ -692,6 +712,13 @@ impl From for Vec { completed_object_processor_tasks, }: Metadata, ) -> Self { + mean_extract_metadata_time /= total_identifier_tasks; + mean_save_db_time_on_identifier_tasks /= total_identifier_tasks; + + mean_fetch_existing_objects_time /= total_object_processor_tasks; + mean_assign_to_existing_object_time /= total_object_processor_tasks; + mean_create_object_time /= total_object_processor_tasks; + vec![ ReportOutputMetadata::FileIdentifier { total_orphan_paths: u64_to_frontend(total_found_orphans), @@ -699,20 +726,26 @@ impl From for Vec { total_objects_linked: u64_to_frontend(linked_objects_count), }, ReportOutputMetadata::Metrics(HashMap::from([ - ("extract_metadata_time".into(), json!(extract_metadata_time)), + ( + "mean_extract_metadata_time".into(), + json!(mean_extract_metadata_time), + ), ( "mean_save_db_time_on_identifier_tasks".into(), json!(mean_save_db_time_on_identifier_tasks), ), ( - "fetch_existing_objects_time".into(), - json!(fetch_existing_objects_time), + "mean_fetch_existing_objects_time".into(), + json!(mean_fetch_existing_objects_time), + ), + ( + "mean_assign_to_existing_object_time".into(), + json!(mean_assign_to_existing_object_time), ), ( - "assign_to_existing_object_time".into(), - json!(assign_to_existing_object_time), + "mean_create_object_time".into(), + json!(mean_create_object_time), ), - ("create_object_time".into(), json!(create_object_time)), ("seeking_orphans_time".into(), json!(seeking_orphans_time)), ("total_found_orphans".into(), json!(total_found_orphans)), ("created_objects_count".into(), json!(created_objects_count)), diff --git a/core/crates/heavy-lifting/src/file_identifier/mod.rs b/core/crates/heavy-lifting/src/file_identifier/mod.rs index b51c413555ab..bcdb365e964f 100644 --- a/core/crates/heavy-lifting/src/file_identifier/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/mod.rs @@ -98,10 +98,14 @@ impl FileMetadata { .await .map_err(|e| FileIOError::from((&path, e)))?; - assert!( - !fs_metadata.is_dir(), - "We can't generate cas_id for directories" - ); + if fs_metadata.is_dir() { + trace!(path = %path.display(), "Skipping directory"); + return Ok(Self { + cas_id: None, + kind: ObjectKind::Folder, + fs_metadata, + }); + } // derive Object kind let kind = Extension::resolve_conflicting(&path, false) @@ -119,8 +123,10 @@ impl FileMetadata { }; trace!( - "Analyzed file: ", - path.display() + path = %path.display(), + ?cas_id, + %kind, + "Analyzed file", ); Ok(Self { diff --git a/core/crates/heavy-lifting/src/file_identifier/shallow.rs b/core/crates/heavy-lifting/src/file_identifier/shallow.rs index 9543b5f0a17b..8bccaef4fa5a 100644 --- a/core/crates/heavy-lifting/src/file_identifier/shallow.rs +++ b/core/crates/heavy-lifting/src/file_identifier/shallow.rs @@ -1,6 +1,6 @@ use crate::{ file_identifier, utils::sub_path::maybe_get_iso_file_path_from_sub_path, Error, - NonCriticalError, OuterContext, + NonCriticalError, OuterContext, UpdateEvent, }; use sd_core_file_path_helper::IsolatedFilePathData; @@ -19,7 +19,7 @@ use std::{ }; use futures::{stream::FuturesUnordered, StreamExt}; -use tracing::{debug, warn}; +use tracing::{debug, instrument, trace, warn}; use super::{ accumulate_file_paths_by_cas_id, dispatch_object_processor_tasks, orphan_path_filters_shallow, @@ -27,6 +27,15 @@ use super::{ CHUNK_SIZE, }; +#[instrument( + skip_all, + fields( + location_id = location.id, + location_path = ?location.path, + sub_path = %sub_path.as_ref().display() + ) + err, +)] pub async fn shallow( location: location::Data, sub_path: impl AsRef + Send, @@ -100,17 +109,11 @@ pub async fn shallow( } if orphans_count == 0 { - debug!( - "No orphans found on ", - location.id, - sub_path.display() - ); + trace!("No orphans found"); return Ok(vec![]); } - let errors = process_tasks(identifier_tasks, dispatcher, ctx).await?; - - Ok(errors) + process_tasks(identifier_tasks, dispatcher, ctx).await } async fn process_tasks( @@ -145,7 +148,7 @@ async fn process_tasks( completed_identifier_tasks += 1; - ctx.report_update(crate::UpdateEvent::NewIdentifiedObjects { + ctx.report_update(UpdateEvent::NewIdentifiedObjects { file_path_ids: file_path_ids_with_new_object, }); @@ -175,21 +178,21 @@ async fn process_tasks( .. } = *any_task_output.downcast().expect("just checked"); - ctx.report_update(crate::UpdateEvent::NewIdentifiedObjects { + ctx.report_update(UpdateEvent::NewIdentifiedObjects { file_path_ids: file_path_ids_with_new_object, }); } } Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => { - warn!("Task returned an empty output"); + warn!(%task_id, "Task returned an empty output"); } Ok(TaskStatus::Shutdown(_)) => { debug!( "Spacedrive is shutting down while a shallow file identifier was in progress" ); - return Ok(vec![]); + return Ok(errors); } Ok(TaskStatus::Error(e)) => { @@ -198,7 +201,7 @@ async fn process_tasks( Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion) => { warn!("Task was cancelled or aborted on shallow file identifier"); - return Ok(vec![]); + return Ok(errors); } Err(e) => { diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs index 2154efda64e2..b7bc9f2177c9 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs @@ -58,7 +58,7 @@ pub struct Identifier { id: TaskId, with_priority: bool, - // Received args + // Received input args location: Arc, location_path: Arc, file_paths_by_id: HashMap, @@ -67,7 +67,7 @@ pub struct Identifier { identified_files: HashMap, file_paths_without_cas_id: Vec, - // Out + // Out collector output: Output, // Dependencies @@ -75,14 +75,29 @@ pub struct Identifier { sync: Arc, } +/// Output from the `[Identifier]` task #[derive(Debug, Default, Serialize, Deserialize)] pub struct Output { + /// To send to frontend for priority reporting of new objects pub file_path_ids_with_new_object: Vec, + + /// Files that need to be aggregate between many identifier tasks to be processed by the + /// object processor tasks pub file_paths_by_cas_id: HashMap>, + + /// Collected metric about time elapsed extracting metadata from file system pub extract_metadata_time: Duration, + + /// Collected metric about time spent saving objects on disk pub save_db_time: Duration, + + /// Total number of objects already created as they didn't have `cas_id`, like directories or empty files pub created_objects_count: u64, + + /// Total number of files that we were able to identify pub total_identified_files: u64, + + /// Non critical errors that happened during the task execution pub errors: Vec, } diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs index 8f3f65216f01..be0694bddf12 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs @@ -22,6 +22,7 @@ pub mod object_processor; pub use identifier::Identifier; pub use object_processor::ObjectProcessor; +/// This object has all needed data to create a new `object` for a `file_path` or link an existing one. #[derive(Debug, Serialize, Deserialize)] pub(super) struct FilePathToCreateOrLinkObject { id: file_path::id::Type, @@ -67,8 +68,7 @@ async fn create_objects_and_update_file_paths( db: &PrismaClient, sync: &SyncManager, ) -> Result, file_identifier::Error> { - trace!("Creating new Objects!"); - + trace!("Preparing objects"); let (object_create_args, file_path_update_args) = files_and_kinds .into_iter() .map( @@ -111,6 +111,11 @@ async fn create_objects_and_update_file_paths( ) .unzip::<_, _, Vec<_>, Vec<_>>(); + trace!( + new_objects_count = object_create_args.len(), + "Creating new Objects!", + ); + // create new object records with assembled values let created_objects_count = sync .write_ops(db, { diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs index 8e412d3475d7..139bb30273e3 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs @@ -20,15 +20,20 @@ use super::{ #[derive(Debug)] pub struct ObjectProcessor { + // Task control id: TaskId, + with_priority: bool, + + // Received input args file_paths_by_cas_id: HashMap>, + // Inner state stage: Stage, + // Out collector output: Output, - with_priority: bool, - + // Dependencies db: Arc, sync: Arc, } @@ -42,13 +47,25 @@ enum Stage { CreateObjects, } +/// Output from the `[ObjectProcessor]` task #[derive(Debug, Serialize, Deserialize, Default)] pub struct Output { + /// To send to frontend for priority reporting of new objects pub file_path_ids_with_new_object: Vec, + + /// Time elapsed fetching existing `objects` from db to be linked to `file_paths` pub fetch_existing_objects_time: Duration, + + /// Time spent linking `file_paths` to already existing `objects` pub assign_to_existing_object_time: Duration, + + /// Time spent creating new `objects` pub create_object_time: Duration, + + /// Number of new `objects` created pub created_objects_count: u64, + + /// Number of `objects` that were linked to `file_paths` pub linked_objects_count: u64, } @@ -112,7 +129,10 @@ impl Task for ObjectProcessor { Stage::AssignFilePathsToExistingObjects { existing_objects_by_cas_id, } => { - trace!("Assigning file paths to existing Objects"); + trace!( + existing_objects_to_link = existing_objects_by_cas_id.len(), + "Assigning file paths to existing Objects", + ); let start = Instant::now(); *linked_objects_count = assign_existing_objects_to_file_paths( file_paths_by_cas_id, @@ -223,7 +243,14 @@ where }) } - inner(cas_ids.into_iter().map(Into::into).collect::>(), db).await + let stringed_cas_ids = cas_ids.into_iter().map(Into::into).collect::>(); + + trace!( + cas_ids_count = stringed_cas_ids.len(), + "Fetching existing objects by cas_ids", + ); + + inner(stringed_cas_ids, db).await } /// Attempt to associate each file path with an object that has been @@ -313,10 +340,10 @@ impl SerializableTask for ObjectProcessor { with_priority, }| Self { id, + with_priority, file_paths_by_cas_id, stage, output, - with_priority, db, sync, }, diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs index d8b439eb70b0..40a6e89f19f8 100644 --- a/core/crates/heavy-lifting/src/indexer/job.rs +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -38,38 +38,43 @@ use itertools::Itertools; use serde::{Deserialize, Serialize}; use serde_json::json; use tokio::time::Instant; -use tracing::{debug, error, trace, warn}; +use tracing::{debug, error, instrument, trace, warn, Level}; use super::{ remove_non_existing_file_paths, reverse_update_directories_sizes, tasks::{ - saver::{SaveTask, SaveTaskOutput}, - updater::{UpdateTask, UpdateTaskOutput}, - walker::{WalkDirTask, WalkTaskOutput, WalkedEntry}, + self, saver, updater, + walker::{self, WalkedEntry}, }, update_directory_sizes, update_location_size, IsoFilePathFactory, WalkerDBProxy, BATCH_SIZE, }; #[derive(Debug)] pub struct Indexer { + // Received arguments location: location_with_indexer_rules::Data, sub_path: Option, - metadata: Metadata, + // Derived from received arguments iso_file_path_factory: IsoFilePathFactory, indexer_ruler: IndexerRuler, walker_root_path: Option>, + + // Inner state ancestors_needing_indexing: HashSet, ancestors_already_indexed: HashSet>, iso_paths_and_sizes: HashMap, u64>, + // Optimizations processing_first_directory: bool, - to_create_buffer: VecDeque, to_update_buffer: VecDeque, + // Run data + metadata: Metadata, errors: Vec, + // On shutdown data pending_tasks_on_resume: Vec>, tasks_for_shutdown: Vec>>, } @@ -95,7 +100,7 @@ impl Job for Indexer { let iso_file_path_factory = self.iso_file_path_factory.clone(); async move { match task_kind { - TaskKind::Walk => WalkDirTask::deserialize( + TaskKind::Walk => tasks::Walker::deserialize( &task_bytes, ( indexer_ruler.clone(), @@ -110,13 +115,13 @@ impl Job for Indexer { .await .map(IntoTask::into_task), - TaskKind::Save => SaveTask::deserialize( + TaskKind::Save => tasks::Saver::deserialize( &task_bytes, (Arc::clone(ctx.db()), Arc::clone(ctx.sync())), ) .await .map(IntoTask::into_task), - TaskKind::Update => UpdateTask::deserialize( + TaskKind::Update => tasks::Updater::deserialize( &task_bytes, (Arc::clone(ctx.db()), Arc::clone(ctx.sync())), ) @@ -135,6 +140,16 @@ impl Job for Indexer { Ok(()) } + #[instrument( + skip_all, + fields( + location_id = self.location.id, + location_path = ?self.location.path, + sub_path = ?self.sub_path.as_ref().map(|path| path.display()), + ), + ret(level = Level::TRACE), + err, + )] async fn run( mut self, dispatcher: JobTaskDispatcher, @@ -205,7 +220,7 @@ impl Job for Indexer { update_location_size(location.id, ctx.db(), &ctx).await?; - metadata.db_write_time += start_size_update_time.elapsed(); + metadata.mean_db_write_time += start_size_update_time.elapsed(); } if metadata.removed_count > 0 { @@ -297,28 +312,28 @@ impl Indexer { )]) .await; - if any_task_output.is::() { + if any_task_output.is::() { return self .process_walk_output( *any_task_output - .downcast::() + .downcast::() .expect("just checked"), ctx, dispatcher, ) .await; - } else if any_task_output.is::() { + } else if any_task_output.is::() { self.process_save_output( *any_task_output - .downcast::() + .downcast::() .expect("just checked"), ctx, ) .await; - } else if any_task_output.is::() { + } else if any_task_output.is::() { self.process_update_output( *any_task_output - .downcast::() + .downcast::() .expect("just checked"), ctx, ) @@ -330,9 +345,22 @@ impl Indexer { Ok(Vec::new()) } + #[instrument( + skip_all, + fields( + to_create_count = to_create.len(), + to_update_count = to_update.len(), + to_remove_count = to_remove.len(), + accepted_ancestors_count = accepted_ancestors.len(), + directory_iso_file_path = %directory_iso_file_path.as_ref().display(), + more_walker_tasks_count = handles.len(), + %total_size, + ?scan_time, + ) + )] async fn process_walk_output( &mut self, - WalkTaskOutput { + walker::Output { to_create, to_update, to_remove, @@ -343,16 +371,20 @@ impl Indexer { mut handles, scan_time, .. - }: WalkTaskOutput, + }: walker::Output, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, ) -> Result>, indexer::Error> { - self.metadata.scan_read_time += scan_time; + self.metadata.mean_scan_read_time += scan_time; + // Initially the handles vec only have walker tasks, but we will add saver and updater tasks later + #[allow(clippy::cast_possible_truncation)] + // SAFETY: we know that `handles.len()` is a valid u32 as we wouldn't dispatch more than `u32::MAX` tasks + { + self.metadata.total_walk_tasks += handles.len() as u32; + } let (to_create_count, to_update_count) = (to_create.len(), to_update.len()); - debug!("Scanned {directory_iso_file_path} in {scan_time:?}"); - *self .iso_paths_and_sizes .entry(directory_iso_file_path) @@ -397,11 +429,12 @@ impl Indexer { self.errors.extend(errors); } - let db_delete_time = Instant::now(); - self.metadata.removed_count += - remove_non_existing_file_paths(to_remove, ctx.db(), ctx.sync()).await?; - self.metadata.db_write_time += db_delete_time.elapsed(); - + if !to_remove.is_empty() { + let db_delete_time = Instant::now(); + self.metadata.removed_count += + remove_non_existing_file_paths(to_remove, ctx.db(), ctx.sync()).await?; + self.metadata.mean_db_write_time += db_delete_time.elapsed(); + } let (save_tasks, update_tasks) = self.prepare_save_and_update_tasks(to_create, to_update, ctx); @@ -430,41 +463,43 @@ impl Indexer { Ok(handles) } + #[instrument(skip(self, ctx))] async fn process_save_output( &mut self, - SaveTaskOutput { + saver::Output { saved_count, save_duration, - }: SaveTaskOutput, + }: saver::Output, ctx: &impl JobContext, ) { self.metadata.indexed_count += saved_count; - self.metadata.db_write_time += save_duration; + self.metadata.mean_db_write_time += save_duration; ctx.progress_msg(format!("Saved {saved_count} files")).await; debug!( - "Processed save task in the indexer ({}/{}), took: {save_duration:?}", + "Processed save task in the indexer ({}/{})", self.metadata.completed_tasks, self.metadata.total_tasks ); } + #[instrument(skip(self, ctx))] async fn process_update_output( &mut self, - UpdateTaskOutput { + updater::Output { updated_count, update_duration, - }: UpdateTaskOutput, + }: updater::Output, ctx: &impl JobContext, ) { self.metadata.updated_count += updated_count; - self.metadata.db_write_time += update_duration; + self.metadata.mean_db_write_time += update_duration; ctx.progress_msg(format!("Updated {updated_count} files")) .await; debug!( - "Processed update task in the indexer ({}/{}), took: {update_duration:?}", + "Processed update task in the indexer ({}/{})", self.metadata.completed_tasks, self.metadata.total_tasks ); } @@ -494,7 +529,7 @@ impl Indexer { } Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => { - warn!("Task returned an empty output"); + warn!(%task_id, "Task returned an empty output"); } Ok(TaskStatus::Shutdown(task)) => { @@ -544,7 +579,7 @@ impl Indexer { pending_running_tasks.push( dispatcher - .dispatch(WalkDirTask::new_deep( + .dispatch(tasks::Walker::new_deep( walker_root_path.as_ref(), Arc::clone(&walker_root_path), self.indexer_ruler.clone(), @@ -559,6 +594,7 @@ impl Indexer { ); self.metadata.total_tasks = 1; + self.metadata.total_walk_tasks = 1; let updates = vec![ ProgressUpdate::TaskCount(self.metadata.total_tasks), @@ -596,11 +632,11 @@ impl Indexer { ); self.metadata.total_paths += self.to_create_buffer.len() as u64; - self.metadata.total_save_steps += 1; + self.metadata.total_save_tasks += 1; pending_running_tasks.push( dispatcher - .dispatch(SaveTask::new_deep( + .dispatch(tasks::Saver::new_deep( self.location.id, self.location.pub_id.clone(), self.to_create_buffer.drain(..).collect(), @@ -618,11 +654,11 @@ impl Indexer { ); self.metadata.total_updated_paths += self.to_update_buffer.len() as u64; - self.metadata.total_update_steps += 1; + self.metadata.total_update_tasks += 1; pending_running_tasks.push( dispatcher - .dispatch(UpdateTask::new_deep( + .dispatch(tasks::Updater::new_deep( self.to_update_buffer.drain(..).collect(), Arc::clone(ctx.db()), Arc::clone(ctx.sync()), @@ -656,9 +692,9 @@ impl Indexer { .map(|chunk| { let chunked_saves = chunk.collect::>(); self.metadata.total_paths += chunked_saves.len() as u64; - self.metadata.total_save_steps += 1; + self.metadata.total_save_tasks += 1; - SaveTask::new_deep( + tasks::Saver::new_deep( self.location.id, self.location.pub_id.clone(), chunked_saves, @@ -679,7 +715,7 @@ impl Indexer { to_create: Vec, to_update: Vec, ctx: &impl JobContext, - ) -> (Vec, Vec) { + ) -> (Vec, Vec) { if self.processing_first_directory { // If we are processing the first directory, we dispatch shallow tasks with higher priority // this way we provide a faster feedback loop to the user @@ -692,9 +728,9 @@ impl Indexer { .map(|chunk| { let chunked_saves = chunk.collect::>(); self.metadata.total_paths += chunked_saves.len() as u64; - self.metadata.total_save_steps += 1; + self.metadata.total_save_tasks += 1; - SaveTask::new_shallow( + tasks::Saver::new_shallow( self.location.id, self.location.pub_id.clone(), chunked_saves, @@ -711,9 +747,9 @@ impl Indexer { .map(|chunk| { let chunked_updates = chunk.collect::>(); self.metadata.total_updated_paths += chunked_updates.len() as u64; - self.metadata.total_update_steps += 1; + self.metadata.total_update_tasks += 1; - UpdateTask::new_shallow( + tasks::Updater::new_shallow( chunked_updates, Arc::clone(ctx.db()), Arc::clone(ctx.sync()), @@ -736,9 +772,9 @@ impl Indexer { .collect::>(); self.metadata.total_paths += chunked_saves.len() as u64; - self.metadata.total_save_steps += 1; + self.metadata.total_save_tasks += 1; - save_tasks.push(SaveTask::new_deep( + save_tasks.push(tasks::Saver::new_deep( self.location.id, self.location.pub_id.clone(), chunked_saves, @@ -765,9 +801,9 @@ impl Indexer { .collect::>(); self.metadata.total_updated_paths += chunked_updates.len() as u64; - self.metadata.total_update_steps += 1; + self.metadata.total_update_tasks += 1; - update_tasks.push(UpdateTask::new_deep( + update_tasks.push(tasks::Updater::new_deep( chunked_updates, Arc::clone(ctx.db()), Arc::clone(ctx.sync()), @@ -786,14 +822,15 @@ impl Indexer { #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct Metadata { - db_write_time: Duration, - scan_read_time: Duration, + mean_db_write_time: Duration, + mean_scan_read_time: Duration, total_tasks: u64, completed_tasks: u64, total_paths: u64, total_updated_paths: u64, - total_save_steps: u64, - total_update_steps: u64, + total_walk_tasks: u32, + total_save_tasks: u32, + total_update_tasks: u32, indexed_count: u64, updated_count: u64, removed_count: u64, @@ -802,32 +839,37 @@ pub struct Metadata { impl From for Vec { fn from( Metadata { - db_write_time, - scan_read_time, + mut mean_db_write_time, + mut mean_scan_read_time, total_tasks, completed_tasks, total_paths, total_updated_paths, - total_save_steps, - total_update_steps, + total_walk_tasks, + total_save_tasks, + total_update_tasks, indexed_count, updated_count, removed_count, }: Metadata, ) -> Self { + mean_scan_read_time /= total_walk_tasks; + mean_db_write_time /= total_save_tasks + total_update_tasks + 1; // +1 to update directories sizes + vec![ ReportOutputMetadata::Indexer { total_paths: u64_to_frontend(total_paths), }, ReportOutputMetadata::Metrics(HashMap::from([ - ("db_write_time".into(), json!(db_write_time)), - ("scan_read_time".into(), json!(scan_read_time)), + ("mean_scan_read_time".into(), json!(mean_scan_read_time)), + ("mean_db_write_time".into(), json!(mean_db_write_time)), ("total_tasks".into(), json!(total_tasks)), ("completed_tasks".into(), json!(completed_tasks)), ("total_paths".into(), json!(total_paths)), ("total_updated_paths".into(), json!(total_updated_paths)), - ("total_save_steps".into(), json!(total_save_steps)), - ("total_update_steps".into(), json!(total_update_steps)), + ("total_walk_tasks".into(), json!(total_walk_tasks)), + ("total_save_tasks".into(), json!(total_save_tasks)), + ("total_update_tasks".into(), json!(total_update_tasks)), ("indexed_count".into(), json!(indexed_count)), ("updated_count".into(), json!(updated_count)), ("removed_count".into(), json!(removed_count)), @@ -847,20 +889,20 @@ enum TaskKind { struct SaveState { location: location_with_indexer_rules::Data, sub_path: Option, - metadata: Metadata, iso_file_path_factory: IsoFilePathFactory, indexer_ruler: IndexerRuler, walker_root_path: Option>, + ancestors_needing_indexing: HashSet, ancestors_already_indexed: HashSet>, iso_paths_and_sizes: HashMap, u64>, processing_first_directory: bool, - to_create_buffer: VecDeque, to_update_buffer: VecDeque, + metadata: Metadata, errors: Vec, tasks_for_shutdown_bytes: Option, @@ -889,7 +931,6 @@ impl SerializableJob for Indexer { rmp_serde::to_vec_named(&SaveState { location, sub_path, - metadata, iso_file_path_factory, indexer_ruler, walker_root_path, @@ -899,29 +940,30 @@ impl SerializableJob for Indexer { processing_first_directory, to_create_buffer, to_update_buffer, + metadata, errors, tasks_for_shutdown_bytes: Some(SerializedTasks(rmp_serde::to_vec_named( &tasks_for_shutdown .into_iter() .map(|task| async move { if task - .is::>( + .is::>( ) { task - .downcast::>( + .downcast::>( ) .expect("just checked") .serialize() .await .map(|bytes| (TaskKind::Walk, bytes)) - } else if task.is::() { - task.downcast::() + } else if task.is::() { + task.downcast::() .expect("just checked") .serialize() .await .map(|bytes| (TaskKind::Save, bytes)) - } else if task.is::() { - task.downcast::() + } else if task.is::() { + task.downcast::() .expect("just checked") .serialize() .await @@ -945,7 +987,6 @@ impl SerializableJob for Indexer { let SaveState { location, sub_path, - metadata, iso_file_path_factory, indexer_ruler, walker_root_path, @@ -955,6 +996,7 @@ impl SerializableJob for Indexer { processing_first_directory, to_create_buffer, to_update_buffer, + metadata, errors, tasks_for_shutdown_bytes, } = rmp_serde::from_slice::(serialized_job)?; diff --git a/core/crates/heavy-lifting/src/indexer/mod.rs b/core/crates/heavy-lifting/src/indexer/mod.rs index a842934715b1..f682b45595a0 100644 --- a/core/crates/heavy-lifting/src/indexer/mod.rs +++ b/core/crates/heavy-lifting/src/indexer/mod.rs @@ -31,7 +31,7 @@ use prisma_client_rust::{operator::or, QueryError, Select}; use rspc::ErrorCode; use serde::{Deserialize, Serialize}; use specta::Type; -use tracing::warn; +use tracing::{instrument, warn}; pub mod job; mod shallow; @@ -154,10 +154,12 @@ async fn update_directory_sizes( file_path::size_in_bytes_bytes::NAME, msgpack!(size_bytes), ), - db.file_path().update( - file_path::pub_id::equals(file_path.pub_id), - vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))], - ), + db.file_path() + .update( + file_path::pub_id::equals(file_path.pub_id), + vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))], + ) + .select(file_path::select!({ id })), )) }) .collect::, Error>>()? @@ -241,6 +243,14 @@ async fn remove_non_existing_file_paths( .map_err(Into::into) } +#[instrument( + skip(base_path, location_path, db, sync, errors), + fields( + base_path = %base_path.as_ref().display(), + location_path = %location_path.as_ref().display(), + ), + err, +)] #[allow(clippy::missing_panics_doc)] // Can't actually panic as we only deal with directories pub async fn reverse_update_directories_sizes( base_path: impl AsRef + Send, diff --git a/core/crates/heavy-lifting/src/indexer/shallow.rs b/core/crates/heavy-lifting/src/indexer/shallow.rs index aadf6662a51b..980318377dc2 100644 --- a/core/crates/heavy-lifting/src/indexer/shallow.rs +++ b/core/crates/heavy-lifting/src/indexer/shallow.rs @@ -18,18 +18,26 @@ use std::{ use futures_concurrency::future::TryJoin; use itertools::Itertools; -use tracing::{debug, warn}; +use tracing::{debug, instrument, warn}; use super::{ remove_non_existing_file_paths, reverse_update_directories_sizes, tasks::{ - saver::{SaveTask, SaveTaskOutput}, - updater::{UpdateTask, UpdateTaskOutput}, - walker::{ToWalkEntry, WalkDirTask, WalkTaskOutput, WalkedEntry}, + self, saver, updater, + walker::{self, ToWalkEntry, WalkedEntry}, }, update_directory_sizes, update_location_size, IsoFilePathFactory, WalkerDBProxy, BATCH_SIZE, }; +#[instrument( + skip_all, + fields( + location_id = location.id, + location_path = ?location.path, + sub_path = %sub_path.as_ref().display() + ) + err, +)] pub async fn shallow( location: location_with_indexer_rules::Data, sub_path: impl AsRef + Send, @@ -51,7 +59,7 @@ pub async fn shallow( .map_err(indexer::Error::from)?, ); - let Some(WalkTaskOutput { + let Some(walker::Output { to_create, to_update, to_remove, @@ -123,15 +131,19 @@ pub async fn shallow( Ok(errors) } +#[instrument( + skip_all, + fields(to_walk_path = %to_walk_path.display()) +)] async fn walk( location: &location_with_indexer_rules::Data, location_path: Arc, to_walk_path: Arc, db: Arc, dispatcher: &BaseTaskDispatcher, -) -> Result, Error> { +) -> Result, Error> { match dispatcher - .dispatch(WalkDirTask::new_shallow( + .dispatch(tasks::Walker::new_shallow( ToWalkEntry::from(&*to_walk_path), to_walk_path, location @@ -155,7 +167,7 @@ async fn walk( { sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => Ok(Some( *data - .downcast::() + .downcast::() .expect("we just dispatched this task"), )), sd_task_system::TaskStatus::Done((_, TaskOutput::Empty)) => { @@ -192,7 +204,7 @@ async fn save_and_update( .chunks(BATCH_SIZE) .into_iter() .map(|chunk| { - SaveTask::new_shallow( + tasks::Saver::new_shallow( location.id, location.pub_id.clone(), chunk.collect::>(), @@ -207,7 +219,7 @@ async fn save_and_update( .chunks(BATCH_SIZE) .into_iter() .map(|chunk| { - UpdateTask::new_shallow( + tasks::Updater::new_shallow( chunk.collect::>(), Arc::clone(&db), Arc::clone(&sync), @@ -233,14 +245,14 @@ async fn save_and_update( { match task_status { sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => { - if data.is::() { + if data.is::() { metadata.indexed_count += data - .downcast::() + .downcast::() .expect("just checked") .saved_count; } else { metadata.updated_count += data - .downcast::() + .downcast::() .expect("just checked") .updated_count; } diff --git a/core/crates/heavy-lifting/src/indexer/tasks/mod.rs b/core/crates/heavy-lifting/src/indexer/tasks/mod.rs index eacba8f11b1a..0d2bf5f18cfa 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/mod.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/mod.rs @@ -1,3 +1,7 @@ pub mod saver; pub mod updater; pub mod walker; + +pub use saver::Saver; +pub use updater::Updater; +pub use walker::Walker; diff --git a/core/crates/heavy-lifting/src/indexer/tasks/saver.rs b/core/crates/heavy-lifting/src/indexer/tasks/saver.rs index fae1c901271f..9466439564c7 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/saver.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/saver.rs @@ -16,127 +16,37 @@ use std::{sync::Arc, time::Duration}; use chrono::Utc; use serde::{Deserialize, Serialize}; use tokio::time::Instant; -use tracing::trace; +use tracing::{instrument, trace, Level}; use super::walker::WalkedEntry; #[derive(Debug)] -pub struct SaveTask { +pub struct Saver { + // Task control id: TaskId, - location_id: location::id::Type, - location_pub_id: location::pub_id::Type, - walked_entries: Vec, - db: Arc, - sync: Arc, is_shallow: bool, -} - -impl SaveTask { - #[must_use] - pub fn new_deep( - location_id: location::id::Type, - location_pub_id: location::pub_id::Type, - walked_entries: Vec, - db: Arc, - sync: Arc, - ) -> Self { - Self { - id: TaskId::new_v4(), - location_id, - location_pub_id, - walked_entries, - db, - sync, - is_shallow: false, - } - } - - #[must_use] - pub fn new_shallow( - location_id: location::id::Type, - location_pub_id: location::pub_id::Type, - walked_entries: Vec, - db: Arc, - sync: Arc, - ) -> Self { - Self { - id: TaskId::new_v4(), - location_id, - location_pub_id, - walked_entries, - db, - sync, - is_shallow: true, - } - } -} -#[derive(Debug, Serialize, Deserialize)] -struct SaveTaskSaveState { - id: TaskId, + // Received input args location_id: location::id::Type, location_pub_id: location::pub_id::Type, walked_entries: Vec, - is_shallow: bool, -} - -impl SerializableTask for SaveTask { - type SerializeError = rmp_serde::encode::Error; - - type DeserializeError = rmp_serde::decode::Error; - - type DeserializeCtx = (Arc, Arc); - - async fn serialize(self) -> Result, Self::SerializeError> { - let Self { - id, - location_id, - location_pub_id, - walked_entries, - is_shallow, - .. - } = self; - rmp_serde::to_vec_named(&SaveTaskSaveState { - id, - location_id, - location_pub_id, - walked_entries, - is_shallow, - }) - } - async fn deserialize( - data: &[u8], - (db, sync): Self::DeserializeCtx, - ) -> Result { - rmp_serde::from_slice(data).map( - |SaveTaskSaveState { - id, - location_id, - location_pub_id, - walked_entries, - is_shallow, - }| Self { - id, - location_id, - location_pub_id, - walked_entries, - db, - sync, - is_shallow, - }, - ) - } + // Dependencies + db: Arc, + sync: Arc, } +/// [`Save`] Task output #[derive(Debug)] -pub struct SaveTaskOutput { +pub struct Output { + /// Number of records inserted on database pub saved_count: u64, + /// Time spent saving records pub save_duration: Duration, } #[async_trait::async_trait] -impl Task for SaveTask { +impl Task for Saver { fn id(&self) -> TaskId { self.id } @@ -146,6 +56,18 @@ impl Task for SaveTask { self.is_shallow } + #[instrument( + skip_all, + fields( + task_id = %self.id, + location_id = %self.location_id, + to_save_count = %self.walked_entries.len(), + is_shallow = self.is_shallow, + ), + ret(level = Level::TRACE), + err, + )] + #[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above async fn run(&mut self, _: &Interrupter) -> Result { use file_path::{ create_unchecked, date_created, date_indexed, date_modified, extension, hidden, inode, @@ -247,14 +169,115 @@ impl Task for SaveTask { .await .map_err(indexer::Error::from)? as u64; - trace!("Inserted {saved_count} records"); + let save_duration = start_time.elapsed(); + + trace!(saved_count, "Inserted records"); Ok(ExecStatus::Done( - SaveTaskOutput { + Output { saved_count, - save_duration: start_time.elapsed(), + save_duration, } .into_output(), )) } } + +impl Saver { + #[must_use] + pub fn new_deep( + location_id: location::id::Type, + location_pub_id: location::pub_id::Type, + walked_entries: Vec, + db: Arc, + sync: Arc, + ) -> Self { + Self { + id: TaskId::new_v4(), + location_id, + location_pub_id, + walked_entries, + db, + sync, + is_shallow: false, + } + } + + #[must_use] + pub fn new_shallow( + location_id: location::id::Type, + location_pub_id: location::pub_id::Type, + walked_entries: Vec, + db: Arc, + sync: Arc, + ) -> Self { + Self { + id: TaskId::new_v4(), + location_id, + location_pub_id, + walked_entries, + db, + sync, + is_shallow: true, + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct SaveState { + id: TaskId, + is_shallow: bool, + + location_id: location::id::Type, + location_pub_id: location::pub_id::Type, + walked_entries: Vec, +} + +impl SerializableTask for Saver { + type SerializeError = rmp_serde::encode::Error; + + type DeserializeError = rmp_serde::decode::Error; + + type DeserializeCtx = (Arc, Arc); + + async fn serialize(self) -> Result, Self::SerializeError> { + let Self { + id, + is_shallow, + location_id, + location_pub_id, + walked_entries, + .. + } = self; + rmp_serde::to_vec_named(&SaveState { + id, + is_shallow, + location_id, + location_pub_id, + walked_entries, + }) + } + + async fn deserialize( + data: &[u8], + (db, sync): Self::DeserializeCtx, + ) -> Result { + rmp_serde::from_slice(data).map( + |SaveState { + id, + is_shallow, + location_id, + location_pub_id, + walked_entries, + }| Self { + id, + is_shallow, + location_id, + location_pub_id, + walked_entries, + db, + sync, + }, + ) + } +} diff --git a/core/crates/heavy-lifting/src/indexer/tasks/updater.rs b/core/crates/heavy-lifting/src/indexer/tasks/updater.rs index 68041fb72a23..b10d8e42b3d2 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/updater.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/updater.rs @@ -17,116 +17,38 @@ use std::{collections::HashSet, sync::Arc, time::Duration}; use serde::{Deserialize, Serialize}; use tokio::time::Instant; -use tracing::trace; +use tracing::{instrument, trace, Level}; use super::walker::WalkedEntry; #[derive(Debug)] -pub struct UpdateTask { +pub struct Updater { + // Task control id: TaskId, - walked_entries: Vec, - object_ids_that_should_be_unlinked: HashSet, - db: Arc, - sync: Arc, is_shallow: bool, -} - -impl UpdateTask { - #[must_use] - pub fn new_deep( - walked_entries: Vec, - db: Arc, - sync: Arc, - ) -> Self { - Self { - id: TaskId::new_v4(), - walked_entries, - db, - sync, - object_ids_that_should_be_unlinked: HashSet::new(), - is_shallow: false, - } - } - #[must_use] - pub fn new_shallow( - walked_entries: Vec, - db: Arc, - sync: Arc, - ) -> Self { - Self { - id: TaskId::new_v4(), - walked_entries, - db, - sync, - object_ids_that_should_be_unlinked: HashSet::new(), - is_shallow: true, - } - } -} - -#[derive(Debug, Serialize, Deserialize)] -struct UpdateTaskSaveState { - id: TaskId, + // Received input args walked_entries: Vec, - object_ids_that_should_be_unlinked: HashSet, - is_shallow: bool, -} -impl SerializableTask for UpdateTask { - type SerializeError = rmp_serde::encode::Error; - - type DeserializeError = rmp_serde::decode::Error; - - type DeserializeCtx = (Arc, Arc); - - async fn serialize(self) -> Result, Self::SerializeError> { - let Self { - id, - walked_entries, - object_ids_that_should_be_unlinked, - is_shallow, - .. - } = self; - - rmp_serde::to_vec_named(&UpdateTaskSaveState { - id, - walked_entries, - object_ids_that_should_be_unlinked, - is_shallow, - }) - } + // Inner state + object_ids_that_should_be_unlinked: HashSet, - async fn deserialize( - data: &[u8], - (db, sync): Self::DeserializeCtx, - ) -> Result { - rmp_serde::from_slice(data).map( - |UpdateTaskSaveState { - id, - walked_entries, - object_ids_that_should_be_unlinked, - is_shallow, - }| Self { - id, - walked_entries, - object_ids_that_should_be_unlinked, - db, - sync, - is_shallow, - }, - ) - } + // Dependencies + db: Arc, + sync: Arc, } +/// [`Update`] Task output #[derive(Debug)] -pub struct UpdateTaskOutput { +pub struct Output { + /// Number of records updated on database pub updated_count: u64, + /// Time spent updating records pub update_duration: Duration, } #[async_trait::async_trait] -impl Task for UpdateTask { +impl Task for Updater { fn id(&self) -> TaskId { self.id } @@ -136,6 +58,17 @@ impl Task for UpdateTask { self.is_shallow } + #[instrument( + skip_all, + fields( + task_id = %self.id, + to_update_count = %self.walked_entries.len(), + is_shallow = self.is_shallow, + ), + ret(level = Level::TRACE), + err, + )] + #[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above async fn run(&mut self, interrupter: &Interrupter) -> Result { use file_path::{ cas_id, date_created, date_modified, hidden, inode, is_dir, object, object_id, @@ -217,6 +150,7 @@ impl Task for UpdateTask { .collect::>(), db.file_path() .update(file_path::pub_id::equals(pub_id.into()), db_params) + // selecting id to avoid fetching whole object from database .select(file_path::select!({ id })), ) }, @@ -231,18 +165,54 @@ impl Task for UpdateTask { .await .map_err(indexer::Error::from)?; - trace!("Updated {updated:?} records"); + let update_duration = start_time.elapsed(); + + trace!(?updated, "Updated records"); Ok(ExecStatus::Done( - UpdateTaskOutput { + Output { updated_count: updated.len() as u64, - update_duration: start_time.elapsed(), + update_duration, } .into_output(), )) } } +impl Updater { + #[must_use] + pub fn new_deep( + walked_entries: Vec, + db: Arc, + sync: Arc, + ) -> Self { + Self { + id: TaskId::new_v4(), + walked_entries, + db, + sync, + object_ids_that_should_be_unlinked: HashSet::new(), + is_shallow: false, + } + } + + #[must_use] + pub fn new_shallow( + walked_entries: Vec, + db: Arc, + sync: Arc, + ) -> Self { + Self { + id: TaskId::new_v4(), + walked_entries, + db, + sync, + object_ids_that_should_be_unlinked: HashSet::new(), + is_shallow: true, + } + } +} + async fn fetch_objects_ids_to_unlink( walked_entries: &[WalkedEntry], object_ids_that_should_be_unlinked: &mut HashSet, @@ -276,3 +246,59 @@ async fn fetch_objects_ids_to_unlink( Ok(()) } + +#[derive(Debug, Serialize, Deserialize)] +struct SaveState { + id: TaskId, + is_shallow: bool, + + walked_entries: Vec, + + object_ids_that_should_be_unlinked: HashSet, +} + +impl SerializableTask for Updater { + type SerializeError = rmp_serde::encode::Error; + + type DeserializeError = rmp_serde::decode::Error; + + type DeserializeCtx = (Arc, Arc); + + async fn serialize(self) -> Result, Self::SerializeError> { + let Self { + id, + walked_entries, + object_ids_that_should_be_unlinked, + is_shallow, + .. + } = self; + + rmp_serde::to_vec_named(&SaveState { + id, + is_shallow, + walked_entries, + object_ids_that_should_be_unlinked, + }) + } + + async fn deserialize( + data: &[u8], + (db, sync): Self::DeserializeCtx, + ) -> Result { + rmp_serde::from_slice(data).map( + |SaveState { + id, + is_shallow, + walked_entries, + object_ids_that_should_be_unlinked, + }| Self { + id, + is_shallow, + walked_entries, + object_ids_that_should_be_unlinked, + db, + sync, + }, + ) + } +} diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs index bc777de76f1f..66e898d0e687 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs @@ -34,7 +34,7 @@ use chrono::{DateTime, Duration as ChronoDuration, FixedOffset}; use futures_concurrency::future::Join; use tokio::{fs, time::Instant}; use tokio_stream::{wrappers::ReadDirStream, StreamExt}; -use tracing::{instrument, trace}; +use tracing::{instrument, trace, Level}; mod entry; mod metadata; @@ -70,43 +70,65 @@ pub trait WalkerDBProxy: Clone + Send + Sync + fmt::Debug + 'static { } #[derive(Debug)] -pub struct WalkDirTask> +pub struct Walker> where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, Dispatcher: TaskDispatcher, { + // Task control id: TaskId, + is_shallow: bool, + + // Received input args entry: ToWalkEntry, root: Arc, entry_iso_file_path: IsolatedFilePathData<'static>, indexer_ruler: IndexerRuler, + + // Inner state + stage: WalkerStage, + + // Dependencies iso_file_path_factory: IsoPathFactory, db_proxy: DBProxy, - stage: WalkerStage, maybe_dispatcher: Option, + + // Non critical errors that happened during the task execution errors: Vec, + + // Time spent walking through the received directory scan_time: Duration, - is_shallow: bool, } +/// [`Walker`] Task output #[derive(Debug)] -pub struct WalkTaskOutput { +pub struct Output { + /// Entries found in the file system that need to be created in database pub to_create: Vec, + /// Entries found in the file system that need to be updated in database pub to_update: Vec, + /// Entries found in the file system that need to be removed from database pub to_remove: Vec, + /// Entries found in the file system that will not be indexed pub non_indexed_paths: Vec, + /// Ancestors of entries that were indexed pub accepted_ancestors: HashSet, + /// Errors that happened during the task execution pub errors: Vec, + /// Directory that was indexed pub directory_iso_file_path: IsolatedFilePathData<'static>, + /// Total size of the directory that was indexed pub total_size: u64, + /// Task handles that were dispatched to run `WalkDir` tasks for inner directories pub handles: Vec>, + /// Time spent walking through the received directory pub scan_time: Duration, } #[async_trait::async_trait] impl Task - for WalkDirTask + for Walker where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, @@ -121,7 +143,17 @@ where self.is_shallow } - #[instrument(skip(self, interrupter), fields(task_id = %self.id, walked_entry = %self.entry.path.display()))] + #[instrument( + skip_all, + fields( + task_id = %self.id, + walked_entry = %self.entry.path.display(), + is_shallow = self.is_shallow, + ), + ret(level = Level::TRACE), + err, + )] + #[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above async fn run(&mut self, interrupter: &Interrupter) -> Result { let Self { root, @@ -158,6 +190,7 @@ where if let Some(rules) = GitIgnoreRules::get_rules_if_in_git_repo(root.as_ref(), path).await { + trace!("Found gitignore rules to follow"); indexer_ruler.extend(rules.map(Into::into)); } } @@ -349,7 +382,7 @@ where // Taking out some data as the task is finally complete Ok(ExecStatus::Done( - WalkTaskOutput { + Output { to_create, to_update, to_remove, @@ -398,7 +431,7 @@ enum WalkerStage { }, } -impl WalkDirTask +impl Walker where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, @@ -430,7 +463,7 @@ where } } -impl WalkDirTask> +impl Walker> where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, @@ -460,6 +493,11 @@ where } } +#[instrument( + skip_all, + fields(entries_count = walking_entries.len()), + err, +)] async fn segregate_creates_and_updates( walking_entries: &mut Vec, db_proxy: &impl WalkerDBProxy, @@ -483,58 +521,67 @@ async fn segregate_creates_and_updates( .collect::>(); Ok(walking_entries.drain(..).fold( - (Vec::new(), Vec::new(), 0), - |(mut to_create, mut to_update, mut total_size), entry| { - let WalkingEntry{iso_file_path, metadata} = &entry; - - total_size += metadata.size_in_bytes; - - if let Some(file_path) = iso_paths_already_in_db.get(iso_file_path) { - if let (Some(inode), Some(date_modified)) = ( - &file_path.inode, - &file_path.date_modified, - ) { + (Vec::new(), Vec::new(), 0), + |(mut to_create, mut to_update, mut total_size), entry| { + let WalkingEntry { + iso_file_path, + metadata, + } = &entry; + + total_size += metadata.size_in_bytes; + + if let Some(file_path) = iso_paths_already_in_db.get(iso_file_path) { + if let (Some(inode), Some(date_modified)) = + (&file_path.inode, &file_path.date_modified) + { if ( - inode_from_db(&inode[0..8]) != metadata.inode - // Datetimes stored in DB loses a bit of precision, so we need to check against a delta - // instead of using != operator - || DateTime::::from(metadata.modified_at) - *date_modified - > ChronoDuration::milliseconds(1) || file_path.hidden.is_none() || metadata.hidden != file_path.hidden.unwrap_or_default() - ) - // We ignore the size of directories because it is not reliable, we need to - // calculate it ourselves later - && !( - iso_file_path.to_parts().is_dir - && metadata.size_in_bytes - != file_path - .size_in_bytes_bytes - .as_ref() - .map(|size_in_bytes_bytes| { - u64::from_be_bytes([ - size_in_bytes_bytes[0], - size_in_bytes_bytes[1], - size_in_bytes_bytes[2], - size_in_bytes_bytes[3], - size_in_bytes_bytes[4], - size_in_bytes_bytes[5], - size_in_bytes_bytes[6], - size_in_bytes_bytes[7], - ]) - }) - .unwrap_or_default() + inode_from_db(&inode[0..8]) != metadata.inode + // Datetimes stored in DB loses a bit of precision, + // so we need to check against a delta + // instead of using != operator + || ( + DateTime::::from(metadata.modified_at) - *date_modified + > ChronoDuration::milliseconds(1) + ) + || file_path.hidden.is_none() + || metadata.hidden != file_path.hidden.unwrap_or_default() + ) + // We ignore the size of directories because it is not reliable, we need to + // calculate it ourselves later + && !( + iso_file_path.to_parts().is_dir + && metadata.size_in_bytes + != file_path + .size_in_bytes_bytes + .as_ref() + .map(|size_in_bytes_bytes| { + u64::from_be_bytes([ + size_in_bytes_bytes[0], + size_in_bytes_bytes[1], + size_in_bytes_bytes[2], + size_in_bytes_bytes[3], + size_in_bytes_bytes[4], + size_in_bytes_bytes[5], + size_in_bytes_bytes[6], + size_in_bytes_bytes[7], + ]) + }) + .unwrap_or_default() ) { - to_update.push( - WalkedEntry::from((&file_path.pub_id, file_path.object_id, entry)), - ); + to_update.push(WalkedEntry::from(( + &file_path.pub_id, + file_path.object_id, + entry, + ))); } } - } else { - to_create.push(WalkedEntry::from(entry)); - } - - (to_create, to_update, total_size) + } else { + to_create.push(WalkedEntry::from(entry)); } - )) + + (to_create, to_update, total_size) + }, + )) } } @@ -553,7 +600,7 @@ async fn keep_walking( to_keep_walking .drain(..) .map(|entry| { - WalkDirTask::new_deep( + Walker::new_deep( entry, Arc::clone(root), indexer_ruler.clone(), @@ -832,7 +879,7 @@ mod tests { let handle = system .dispatch( - WalkDirTask::new_deep( + Walker::new_deep( root_path.to_path_buf(), Arc::new(root_path.to_path_buf()), indexer_ruler, @@ -861,13 +908,13 @@ mod tests { panic!("unexpected task output") }; - let WalkTaskOutput { + let Output { to_create, accepted_ancestors, errors, handles, .. - } = *output.downcast::().unwrap(); + } = *output.downcast::().unwrap(); assert!(errors.is_empty(), "errors: {errors:#?}"); diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs index 50d86c9673b7..d5fdc72b8f3f 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs @@ -17,19 +17,22 @@ use serde::{Deserialize, Serialize}; use super::{ entry::{ToWalkEntry, WalkingEntry}, metadata::InnerMetadata, - IsoFilePathFactory, WalkDirTask, WalkedEntry, WalkerDBProxy, WalkerStage, + IsoFilePathFactory, WalkedEntry, Walker, WalkerDBProxy, WalkerStage, }; #[derive(Debug, Serialize, Deserialize)] pub(super) struct WalkDirSaveState { id: TaskId, + is_shallow: bool, + entry: ToWalkEntry, root: Arc, entry_iso_file_path: IsolatedFilePathData<'static>, + stage: WalkerStageSaveState, + errors: Vec, scan_time: Duration, - is_shallow: bool, } #[derive(Debug, Serialize, Deserialize)] @@ -152,7 +155,7 @@ impl From for WalkerStage { } impl SerializableTask - for WalkDirTask + for Walker where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, @@ -176,13 +179,13 @@ where } = self; rmp_serde::to_vec_named(&WalkDirSaveState { id, + is_shallow, entry, root, entry_iso_file_path, stage: stage.into(), errors, scan_time, - is_shallow, }) } @@ -209,7 +212,7 @@ where iso_file_path_factory, db_proxy, stage: stage.into(), - maybe_dispatcher: is_shallow.then_some(dispatcher), + maybe_dispatcher: (!is_shallow).then_some(dispatcher), errors, scan_time, is_shallow, diff --git a/core/crates/heavy-lifting/src/job_system/job.rs b/core/crates/heavy-lifting/src/job_system/job.rs index 0b019d786070..cbab3278bdbf 100644 --- a/core/crates/heavy-lifting/src/job_system/job.rs +++ b/core/crates/heavy-lifting/src/job_system/job.rs @@ -31,7 +31,7 @@ use tokio::{ spawn, sync::{watch, Mutex}, }; -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, error, info, instrument, trace, warn}; use uuid::Uuid; use super::{ @@ -230,6 +230,13 @@ pub struct JobOutput { } impl JobOutput { + #[instrument( + skip_all, + fields( + name = %report.name, + non_critical_errors_count = non_critical_errors.len(), + ) + )] pub fn prepare_output_and_report( JobReturn { data, @@ -240,14 +247,14 @@ impl JobOutput { ) -> Self { if non_critical_errors.is_empty() { report.status = Status::Completed; - debug!("Job completed", report.id, report.name); + debug!("Job completed"); } else { report.status = Status::CompletedWithErrors; report.non_critical_errors.extend(non_critical_errors); warn!( - "Job completed with errors: {:#?}", - report.id, report.name, report.non_critical_errors + non_critical_errors = ?report.non_critical_errors, + "Job completed with errors", ); } @@ -271,7 +278,7 @@ impl JobOutput { #[derive(Debug, Serialize, Type)] pub enum JobOutputData { Empty, - // TODO: Add more types + // TODO: Add more types as needed } pub struct JobEnqueuer @@ -370,13 +377,11 @@ pub struct JobHandle> { } impl> JobHandle { + #[instrument(skip(self), fields(id = %self.id), err)] pub async fn send_command(&mut self, command: Command) -> Result<(), JobSystemError> { - trace!( - "Handle sending command {command:?} to ", - self.id - ); + trace!("JobHandle sending command"); if self.commands_tx.send(command).await.is_err() { - warn!("Tried to send a {command:?} to a job that was already completed"); + warn!("Tried to send command to a job that was already completed"); Ok(()) } else { @@ -384,7 +389,7 @@ impl> JobHandle Result<(), JobSystemError> { + async fn command_children(&mut self, command: Command) -> Result<(), JobSystemError> { let (new_status, completed_at) = match command { Command::Pause => (Status::Paused, None), Command::Resume => return Ok(()), @@ -399,8 +404,8 @@ impl> JobHandle", - next_job_report.id + %next_job_report.id, + "Parent job sent command to children job", ); next_job_report.update(self.ctx.db()).await @@ -412,11 +417,12 @@ impl> JobHandle, ) -> Result<(), JobSystemError> { - trace!("Handle registering start of ", self.id); + trace!("JobHandle registering start of job"); let Self { next_jobs, ctx, .. } = self; @@ -443,8 +449,8 @@ impl> JobHandle", - next_job_report.id + %next_job_report.id, + "Parent job registering children ", ); if next_job_report.created_at.is_none() { next_job_report.create(db).await @@ -459,6 +465,14 @@ impl> JobHandle> JobHandle", self.id); + trace!("JobHandle completing"); let output = JobOutput::prepare_output_and_report(job_return, &mut report); report.update(ctx.db()).await?; - trace!("Handle completed ", self.id); + trace!("JobHandle completed"); Ok(output) } + #[instrument( + skip(self), + fields( + id = %self.id, + ), + err + )] pub async fn failed_job(&mut self, e: &Error) -> Result<(), JobSystemError> { - trace!("Handle registering failed job ", self.id); + trace!("JobHandle registering failed job"); let db = self.ctx.db(); { let mut report = self.ctx.report_mut().await; error!( - "Job failed with a critical error: {e:#?};", - report.id, report.name + job_name = %report.name, + "Job failed with a critical error", ); report.status = Status::Failed; @@ -497,17 +518,21 @@ impl> JobHandle", - self.id - ); + trace!("JobHandle sending cancel command to children due to failure"); self.command_children(Command::Cancel).await } - // TODO usar essa caralha + #[instrument( + skip(self), + fields( + id = %self.id, + ), + err + )] + // TODO use this pub async fn shutdown_pause_job(&mut self) -> Result<(), JobSystemError> { - trace!("Handle pausing job on shutdown: ", self.id); + trace!("JobHandle pausing job on shutdown"); let db = self.ctx.db(); @@ -515,8 +540,8 @@ impl> JobHandle paused due to system shutdown, we will pause all children jobs", - report.id, report.name + job_name = %report.name, + "Job paused due to system shutdown, we will pause all children jobs", ); report.status = Status::Paused; @@ -527,16 +552,23 @@ impl> JobHandle Result<(), JobSystemError> { - trace!("Handle canceling job: ", self.id); + trace!("JobHandle canceling job"); let db = self.ctx.db(); { let mut report = self.ctx.report_mut().await; info!( - "Job canceled, we will cancel all children jobs", - report.id, report.name + job_name = %report.name, + "Job canceled, we will cancel all children jobs", ); report.status = Status::Canceled; @@ -545,10 +577,7 @@ impl> JobHandle", - self.id - ); + trace!("JobHandle sending cancel command to children"); self.command_children(Command::Cancel).await } @@ -626,6 +655,7 @@ where self.job.serialize().await } + #[instrument(skip_all, fields(id = %self.id))] fn dispatch( self: Box, base_dispatcher: BaseTaskDispatcher, @@ -636,9 +666,9 @@ where let ctx = JobCtx::new(self.report, ctx); - trace!("Dispatching job ", self.id); + trace!("Dispatching job"); - spawn(to_spawn_job::( + spawn(to_spawn_job::( self.id, self.job, ctx.clone(), @@ -656,6 +686,13 @@ where } } + #[instrument( + skip_all, + fields( + id = %self.id, + has_serialized_tasks = %serialized_tasks.is_some(), + ) + )] fn resume( self: Box, base_dispatcher: BaseTaskDispatcher, @@ -667,9 +704,9 @@ where let ctx = JobCtx::new(self.report, ctx); - trace!("Resuming job ", self.id); + trace!("Resuming job"); - spawn(to_spawn_job::( + spawn(to_spawn_job::( self.id, self.job, ctx.clone(), @@ -688,15 +725,20 @@ where } } -async fn to_spawn_job>( +#[instrument(skip_all, fields(id = %id, name = %J::NAME))] +async fn to_spawn_job( id: JobId, - mut job: impl Job, + mut job: J, ctx: JobCtx, existing_tasks: Option, base_dispatcher: BaseTaskDispatcher, commands_rx: chan::Receiver, done_tx: chan::Sender<(JobId, Result)>, -) { +) where + OuterCtx: OuterContext, + JobCtx: JobContext, + J: Job, +{ enum StreamMessage { Commands(Command), NewRemoteController(TaskRemoteController), @@ -738,7 +780,7 @@ async fn to_spawn_job>( match command { Command::Pause => { - trace!("Pausing job ", id); + trace!("Pausing job"); running_state_tx.send_modify(|state| *state = JobRunningState::Paused); remote_controllers .iter() @@ -756,7 +798,7 @@ async fn to_spawn_job>( }); } Command::Resume => { - trace!("Resuming job ", id); + trace!("Resuming job"); running_state_tx.send_modify(|state| *state = JobRunningState::Running); remote_controllers @@ -775,7 +817,7 @@ async fn to_spawn_job>( }); } Command::Cancel => { - trace!("Canceling job ", id); + trace!("Canceling job"); remote_controllers .iter() .map(TaskRemoteController::cancel) @@ -792,7 +834,7 @@ async fn to_spawn_job>( } StreamMessage::Done(res) => { - trace!("Job done", id); + trace!("Job done"); #[cfg(debug_assertions)] { // Just a sanity check to make sure we don't have any pending tasks left diff --git a/core/crates/heavy-lifting/src/job_system/mod.rs b/core/crates/heavy-lifting/src/job_system/mod.rs index b50133638645..ae5fe256a2e4 100644 --- a/core/crates/heavy-lifting/src/job_system/mod.rs +++ b/core/crates/heavy-lifting/src/job_system/mod.rs @@ -92,7 +92,7 @@ impl> JobSystem"); + warn!(%ctx_id, "Found stored jobs for a database that doesn't exist anymore"); None }, |ctx| Some((entries, ctx.clone())), @@ -325,7 +325,7 @@ async fn load_stored_job_entries> JobSystemRunner> JobSystemRunner> JobSystemRunner"); + debug!(%name, "Job was shutdown and serialized"); } Ok(None) => { debug!( "Job was shutdown but didn't returned any serialized data, \ - probably it isn't resumable job: " + probably it isn't resumable job" ); } Err(e) => { - error!("Failed to serialize job: {e:#?}"); + error!(?e, "Failed to serialize job"); } } @@ -457,6 +457,7 @@ impl> JobSystemRunner>( parent_job_id: JobId, parent_job_name: JobName, @@ -478,10 +479,7 @@ async fn serialize_next_jobs_to_shutdown: {e:#?}" - ); + error!(%next_id, %next_name, ?e, "Failed to serialize next job"); }) }) .collect::>() @@ -493,6 +491,15 @@ async fn serialize_next_jobs_to_shutdown>( handle: &mut JobHandle, location_id: location::id::Type, @@ -514,10 +521,8 @@ async fn try_dispatch_next_job", - next.job_name() - ); + trace!(%next_id, %next_name, "Dispatching next job"); + job_hashes_by_id.insert(next_id, next_hash); running_jobs_by_job_id.insert(next_id, (next_name, location_id)); running_jobs_set.insert((next_name, location_id)); @@ -538,7 +543,7 @@ async fn try_dispatch_next_job", next.job_name()); + warn!(%next_id, %next_name, "Unexpectedly found a job with the same hash as the next job"); } } else { trace!("No next jobs to dispatch"); @@ -638,19 +643,19 @@ pub(super) async fn run>( let mut job_return_status_stream = pin!(job_return_status_rx_to_shutdown); debug!( - "Waiting for {} jobs to shutdown before shutting down the job system...", - runner.total_jobs() + total_jobs = runner.total_jobs(), + "Waiting for jobs to shutdown before shutting down the job system...", ); while let Some((job_id, status)) = job_return_status_stream.next().await { if let Err(e) = runner.process_return_status(job_id, status).await { - error!("Failed to process return status before shutting down: {e:#?}"); + error!(?e, "Failed to process return status before shutting down"); } } // Now the runner can shutdown if let Err(e) = runner.save_jobs(store_jobs_file).await { - error!("Failed to save jobs before shutting down: {e:#?}"); + error!(?e, "Failed to save jobs before shutting down"); } } diff --git a/crates/task-system/src/system.rs b/crates/task-system/src/system.rs index c0629c45f714..0d78f9902ede 100644 --- a/crates/task-system/src/system.rs +++ b/crates/task-system/src/system.rs @@ -451,7 +451,7 @@ pub trait Dispatcher: fmt::Debug + Clone + Send + Sync + 'static { into_tasks: I, ) -> impl Future>> + Send where - ::IntoIter: Send, + I::IntoIter: Send, { self.dispatch_many_boxed(into_tasks.into_iter().map(IntoTask::into_task)) } From a21e6907f42106a89d2e6581d5677d5741c6fbac Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Wed, 29 May 2024 03:24:27 -0300 Subject: [PATCH 22/33] Fixed some bugs and did more polishing --- .../heavy-lifting/src/file_identifier/job.rs | 29 +- .../src/file_identifier/tasks/identifier.rs | 3 +- .../src/file_identifier/tasks/mod.rs | 55 +- .../file_identifier/tasks/object_processor.rs | 82 ++- core/crates/heavy-lifting/src/indexer/job.rs | 19 +- .../heavy-lifting/src/job_system/job.rs | 11 + .../heavy-lifting/src/job_system/runner.rs | 15 +- .../heavy-lifting/src/job_system/store.rs | 4 + core/crates/heavy-lifting/src/lib.rs | 10 +- .../helpers/exif_media_data.rs | 10 +- .../helpers/ffmpeg_media_data.rs | 80 +-- .../media_processor/helpers/thumbnailer.rs | 194 ++++--- .../heavy-lifting/src/media_processor/job.rs | 491 ++++++++++-------- .../heavy-lifting/src/media_processor/mod.rs | 59 ++- .../src/media_processor/shallow.rs | 62 +-- .../tasks/media_data_extractor.rs | 371 +++++++------ .../src/media_processor/tasks/thumbnailer.rs | 132 +++-- .../heavy-lifting/src/utils/sub_path.rs | 2 +- core/src/location/manager/watcher/utils.rs | 31 +- core/src/location/mod.rs | 2 +- packages/client/src/core.ts | 2 +- 21 files changed, 973 insertions(+), 691 deletions(-) diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index edb2240edb04..874d2f1d2399 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -409,6 +409,7 @@ impl FileIdentifier { ) -> Vec> { self.metadata.mean_extract_metadata_time += extract_metadata_time; self.metadata.mean_save_db_time_on_identifier_tasks += save_db_time; + self.metadata.total_identified_files += total_identified_files; self.metadata.created_objects_count += created_objects_count; let file_paths_with_new_object_to_report = file_path_ids_with_new_object @@ -434,8 +435,8 @@ impl FileIdentifier { ctx.progress(vec![ ProgressUpdate::CompletedTaskCount(u64::from(self.metadata.completed_identifier_tasks)), ProgressUpdate::Message(format!( - "Identified {total_identified_files} of {} files", - self.metadata.total_found_orphans + "Identified {} of {} files", + self.metadata.total_identified_files, self.metadata.total_found_orphans )), ]) .await; @@ -571,6 +572,15 @@ impl FileIdentifier { self.metadata.total_identifier_tasks += 1; + ctx.progress(vec![ + ProgressUpdate::TaskCount(u64::from(self.metadata.total_identifier_tasks)), + ProgressUpdate::Message(format!( + "Found {} orphan paths", + self.metadata.total_found_orphans + )), + ]) + .await; + pending_running_tasks.push( dispatcher .dispatch(tasks::Identifier::new( @@ -636,6 +646,15 @@ impl FileIdentifier { self.metadata.total_identifier_tasks += 1; + ctx.progress(vec![ + ProgressUpdate::TaskCount(u64::from(self.metadata.total_identifier_tasks)), + ProgressUpdate::Message(format!( + "Found {} orphan paths", + self.metadata.total_found_orphans + )), + ]) + .await; + pending_running_tasks.push( dispatcher .dispatch(tasks::Identifier::new( @@ -686,6 +705,7 @@ pub struct Metadata { mean_create_object_time: Duration, seeking_orphans_time: Duration, total_found_orphans: u64, + total_identified_files: u64, created_objects_count: u64, linked_objects_count: u64, total_identifier_tasks: u32, @@ -704,6 +724,7 @@ impl From for Vec { mut mean_create_object_time, seeking_orphans_time, total_found_orphans, + total_identified_files, created_objects_count, linked_objects_count, total_identifier_tasks, @@ -748,6 +769,10 @@ impl From for Vec { ), ("seeking_orphans_time".into(), json!(seeking_orphans_time)), ("total_found_orphans".into(), json!(total_found_orphans)), + ( + "total_identified_files".into(), + json!(total_identified_files), + ), ("created_objects_count".into(), json!(created_objects_count)), ("linked_objects_count".into(), json!(linked_objects_count)), ( diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs index b7bc9f2177c9..2eb0b29febcd 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs @@ -262,7 +262,8 @@ impl Task for Identifier { output.save_db_time = start_time.elapsed(); output.created_objects_count = file_path_ids_with_new_object.len() as u64; - output.file_path_ids_with_new_object = file_path_ids_with_new_object; + output.file_path_ids_with_new_object = + file_path_ids_with_new_object.into_keys().collect(); output.file_paths_by_cas_id = identified_files.drain().fold( HashMap::new(), diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs index be0694bddf12..e1751eeb4d1e 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/mod.rs @@ -11,6 +11,8 @@ use sd_prisma::{ use sd_sync::{CRDTOperation, OperationFactory}; use sd_utils::msgpack; +use std::collections::{HashMap, HashSet}; + use chrono::{DateTime, FixedOffset}; use prisma_client_rust::Select; use serde::{Deserialize, Serialize}; @@ -67,16 +69,16 @@ async fn create_objects_and_update_file_paths( files_and_kinds: impl IntoIterator + Send, db: &PrismaClient, sync: &SyncManager, -) -> Result, file_identifier::Error> { +) -> Result, file_identifier::Error> { trace!("Preparing objects"); - let (object_create_args, file_path_update_args) = files_and_kinds + let (object_create_args, file_path_args) = files_and_kinds .into_iter() .map( |FilePathToCreateOrLinkObject { + id, file_path_pub_id, - created_at, kind, - .. + created_at, }| { let object_pub_id = ObjectPubId::new(); @@ -105,12 +107,20 @@ async fn create_objects_and_update_file_paths( ), object::create_unchecked(object_pub_id.to_db(), db_params), ), - connect_file_path_to_object(&file_path_pub_id, &object_pub_id, db, sync), + ( + (id, object_pub_id.clone()), + connect_file_path_to_object(&file_path_pub_id, &object_pub_id, db, sync), + ), ) }, ) .unzip::<_, _, Vec<_>, Vec<_>>(); + let (mut object_pub_id_by_file_path_id, file_path_update_args) = file_path_args + .into_iter() + .unzip::<_, _, HashMap<_, _>, Vec<_>>( + ); + trace!( new_objects_count = object_create_args.len(), "Creating new Objects!", @@ -135,22 +145,27 @@ async fn create_objects_and_update_file_paths( if created_objects_count > 0 { trace!("Updating file paths with created objects"); - sync.write_ops( - db, - file_path_update_args - .into_iter() - .unzip::<_, _, Vec<_>, Vec<_>>(), - ) - .await - .map(|file_paths| { - file_paths - .into_iter() - .map(|file_path_id::Data { id }| id) - .collect() - }) - .map_err(Into::into) + let updated_file_path_ids = sync + .write_ops( + db, + file_path_update_args + .into_iter() + .unzip::<_, _, Vec<_>, Vec<_>>(), + ) + .await + .map(|file_paths| { + file_paths + .into_iter() + .map(|file_path_id::Data { id }| id) + .collect::>() + })?; + + object_pub_id_by_file_path_id + .retain(|file_path_id, _| updated_file_path_ids.contains(file_path_id)); + + Ok(object_pub_id_by_file_path_id) } else { trace!("No objects created, skipping file path updates"); - Ok(vec![]) + Ok(HashMap::new()) } } diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs index 139bb30273e3..601669a9a999 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs @@ -1,6 +1,6 @@ use crate::{file_identifier, Error}; -use sd_core_prisma_helpers::{object_for_file_identifier, CasId, ObjectPubId}; +use sd_core_prisma_helpers::{file_path_id, object_for_file_identifier, CasId, ObjectPubId}; use sd_core_sync::Manager as SyncManager; use sd_prisma::prisma::{file_path, object, PrismaClient}; @@ -134,7 +134,7 @@ impl Task for ObjectProcessor { "Assigning file paths to existing Objects", ); let start = Instant::now(); - *linked_objects_count = assign_existing_objects_to_file_paths( + let more_file_path_ids_with_new_object = assign_existing_objects_to_file_paths( file_paths_by_cas_id, existing_objects_by_cas_id, db, @@ -142,6 +142,8 @@ impl Task for ObjectProcessor { ) .await?; *assign_to_existing_object_time = start.elapsed(); + file_path_ids_with_new_object.extend(more_file_path_ids_with_new_object); + *linked_objects_count += file_path_ids_with_new_object.len() as u64; trace!( existing_objects_to_link = existing_objects_by_cas_id.len(), @@ -164,13 +166,12 @@ impl Task for ObjectProcessor { "Creating new Objects" ); let start = Instant::now(); - *file_path_ids_with_new_object = create_objects_and_update_file_paths( - mem::take(file_paths_by_cas_id).into_values().flatten(), - db, - sync, - ) - .await?; + let (more_file_paths_with_new_object, more_linked_objects_count) = + assign_objects_to_duplicated_orphans(file_paths_by_cas_id, db, sync) + .await?; *create_object_time = start.elapsed(); + file_path_ids_with_new_object.extend(more_file_paths_with_new_object); + *linked_objects_count += more_linked_objects_count; *created_objects_count = file_path_ids_with_new_object.len() as u64; @@ -261,7 +262,7 @@ async fn assign_existing_objects_to_file_paths( objects_by_cas_id: &HashMap, db: &PrismaClient, sync: &SyncManager, -) -> Result { +) -> Result, file_identifier::Error> { sync.write_ops( db, objects_by_cas_id @@ -288,10 +289,71 @@ async fn assign_existing_objects_to_file_paths( .unzip::<_, _, Vec<_>, Vec<_>>(), ) .await - .map(|file_paths| file_paths.len() as u64) + .map(|file_paths| { + file_paths + .into_iter() + .map(|file_path_id::Data { id }| id) + .collect() + }) .map_err(Into::into) } +async fn assign_objects_to_duplicated_orphans( + file_paths_by_cas_id: &mut HashMap>, + db: &PrismaClient, + sync: &SyncManager, +) -> Result<(Vec, u64), file_identifier::Error> { + // at least 1 file path per cas_id + let mut selected_file_paths = Vec::with_capacity(file_paths_by_cas_id.len()); + let mut cas_ids_by_file_path_id = HashMap::with_capacity(file_paths_by_cas_id.len()); + + file_paths_by_cas_id.retain(|cas_id, file_paths| { + let file_path = file_paths.pop().expect("file_paths can't be empty"); + let has_more_file_paths = !file_paths.is_empty(); + + if has_more_file_paths { + cas_ids_by_file_path_id.insert(file_path.id, cas_id.clone()); + } + selected_file_paths.push(file_path); + + has_more_file_paths + }); + + let (mut file_paths_with_new_object, objects_by_cas_id) = + create_objects_and_update_file_paths(selected_file_paths, db, sync) + .await? + .into_iter() + .map(|(file_path_id, object_pub_id)| { + ( + file_path_id, + cas_ids_by_file_path_id + .remove(&file_path_id) + .map(|cas_id| (cas_id, object_pub_id)), + ) + }) + .unzip::<_, _, Vec<_>, Vec<_>>(); + + let more_file_paths_ids_with_new_object = assign_existing_objects_to_file_paths( + file_paths_by_cas_id, + &objects_by_cas_id.into_iter().flatten().collect(), + db, + sync, + ) + .await?; + + // Sanity check + assert!( + file_paths_by_cas_id.is_empty(), + "We MUST have processed all pending `file_paths` by now" + ); + + let linked_objects_count = more_file_paths_ids_with_new_object.len() as u64; + + file_paths_with_new_object.extend(more_file_paths_ids_with_new_object); + + Ok((file_paths_with_new_object, linked_objects_count)) +} + #[derive(Debug, Serialize, Deserialize)] pub struct SaveState { id: TaskId, diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs index 40a6e89f19f8..b4ff7a19c09e 100644 --- a/core/crates/heavy-lifting/src/indexer/job.rs +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -307,11 +307,6 @@ impl Indexer { ) -> Result>, indexer::Error> { self.metadata.completed_tasks += 1; - ctx.progress(vec![ProgressUpdate::CompletedTaskCount( - self.metadata.completed_tasks, - )]) - .await; - if any_task_output.is::() { return self .process_walk_output( @@ -454,6 +449,7 @@ impl Indexer { ctx.progress(vec![ ProgressUpdate::TaskCount(self.metadata.total_tasks), + ProgressUpdate::CompletedTaskCount(self.metadata.completed_tasks), ProgressUpdate::message(format!( "Found {to_create_count} new files and {to_update_count} to update" )), @@ -475,7 +471,11 @@ impl Indexer { self.metadata.indexed_count += saved_count; self.metadata.mean_db_write_time += save_duration; - ctx.progress_msg(format!("Saved {saved_count} files")).await; + ctx.progress(vec![ + ProgressUpdate::CompletedTaskCount(self.metadata.completed_tasks), + ProgressUpdate::message(format!("Saved {} files", self.metadata.indexed_count)), + ]) + .await; debug!( "Processed save task in the indexer ({}/{})", @@ -495,8 +495,11 @@ impl Indexer { self.metadata.updated_count += updated_count; self.metadata.mean_db_write_time += update_duration; - ctx.progress_msg(format!("Updated {updated_count} files")) - .await; + ctx.progress(vec![ + ProgressUpdate::CompletedTaskCount(self.metadata.completed_tasks), + ProgressUpdate::message(format!("Updated {} files", self.metadata.updated_count)), + ]) + .await; debug!( "Processed update task in the indexer ({}/{})", diff --git a/core/crates/heavy-lifting/src/job_system/job.rs b/core/crates/heavy-lifting/src/job_system/job.rs index cbab3278bdbf..f4eb39436ae9 100644 --- a/core/crates/heavy-lifting/src/job_system/job.rs +++ b/core/crates/heavy-lifting/src/job_system/job.rs @@ -15,6 +15,7 @@ use std::{ path::Path, pin::pin, sync::Arc, + time::Duration, }; use async_channel as chan; @@ -30,6 +31,7 @@ use strum::{Display, EnumString}; use tokio::{ spawn, sync::{watch, Mutex}, + time::Instant, }; use tracing::{debug, error, info, instrument, trace, warn}; use uuid::Uuid; @@ -141,6 +143,7 @@ where Box::new(JobHolder { id, job: self, + run_time: Duration::ZERO, report: ReportBuilder::new(id, J::NAME).build(), next_jobs: VecDeque::new(), _ctx: PhantomData, @@ -304,6 +307,7 @@ where Box::new(JobHolder { id: self.id, job: self.job, + run_time: Duration::ZERO, report: self.report_builder.build(), next_jobs: self.next_jobs, _ctx: self._ctx, @@ -365,12 +369,15 @@ where pub(super) id: JobId, pub(super) job: J, pub(super) report: Report, + pub(super) run_time: Duration, pub(super) next_jobs: VecDeque>>, pub(super) _ctx: PhantomData, } pub struct JobHandle> { pub(crate) id: JobId, + pub(crate) start_time: Instant, + pub(crate) run_time: Duration, pub(crate) next_jobs: VecDeque>>, pub(crate) ctx: JobCtx, pub(crate) commands_tx: chan::Sender, @@ -680,6 +687,8 @@ where JobHandle { id: self.id, + start_time: Instant::now(), + run_time: Duration::ZERO, next_jobs: self.next_jobs, ctx, commands_tx, @@ -719,6 +728,8 @@ where JobHandle { id: self.id, next_jobs: self.next_jobs, + start_time: Instant::now(), + run_time: self.run_time, ctx, commands_tx, } diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index f1755f32d204..75b81243aa40 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -19,6 +19,7 @@ use futures_concurrency::{ future::{Join, TryJoin}, stream::Merge, }; +use serde_json::json; use tokio::{ fs, sync::oneshot, @@ -30,7 +31,7 @@ use uuid::Uuid; use super::{ job::{DynJob, JobHandle, JobName, JobOutput, OuterContext, ReturnStatus}, - report, + report::{self, ReportMetadata, ReportOutputMetadata}, store::{StoredJob, StoredJobEntry}, Command, JobId, JobSystemError, SerializedTasks, }; @@ -286,6 +287,16 @@ impl> JobSystemRunner { @@ -322,6 +333,7 @@ impl> JobSystemRunner, } @@ -170,6 +172,7 @@ macro_rules! match_deserialize_job { let StoredJob { id, name, + run_time, serialized_job, } = $stored_job; @@ -187,6 +190,7 @@ macro_rules! match_deserialize_job { Box::new(JobHolder { id, job, + run_time, report: $report, next_jobs: VecDeque::new(), _ctx: PhantomData, diff --git a/core/crates/heavy-lifting/src/lib.rs b/core/crates/heavy-lifting/src/lib.rs index 5898d3b6caba..fbdc268e9ed1 100644 --- a/core/crates/heavy-lifting/src/lib.rs +++ b/core/crates/heavy-lifting/src/lib.rs @@ -28,8 +28,6 @@ #![forbid(deprecated_in_future)] #![allow(clippy::missing_errors_doc, clippy::module_name_repetitions)] -use file_identifier::NonCriticalFileIdentifierError; -use indexer::NonCriticalIndexerError; use sd_prisma::prisma::file_path; use sd_task_system::TaskSystemError; @@ -43,7 +41,7 @@ pub mod job_system; pub mod media_processor; pub mod utils; -use media_processor::{NonCriticalMediaProcessorError, ThumbKey}; +use media_processor::ThumbKey; pub use job_system::{ job::{ @@ -85,11 +83,11 @@ impl From for rspc::Error { pub enum NonCriticalError { // TODO: Add variants as needed #[error(transparent)] - Indexer(#[from] NonCriticalIndexerError), + Indexer(#[from] indexer::NonCriticalIndexerError), #[error(transparent)] - FileIdentifier(#[from] NonCriticalFileIdentifierError), + FileIdentifier(#[from] file_identifier::NonCriticalFileIdentifierError), #[error(transparent)] - MediaProcessor(#[from] NonCriticalMediaProcessorError), + MediaProcessor(#[from] media_processor::NonCriticalMediaProcessorError), } #[repr(i32)] diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs b/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs index 314eec128b85..82b722753487 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/exif_media_data.rs @@ -1,6 +1,6 @@ use crate::media_processor::{self, media_data_extractor}; -use prisma_client_rust::QueryError; +use sd_core_prisma_helpers::ObjectPubId; use sd_core_sync::Manager as SyncManager; use sd_file_ext::extensions::{Extension, ImageExtension, ALL_IMAGE_EXTENSIONS}; @@ -10,13 +10,13 @@ use sd_prisma::{ prisma_sync, }; use sd_sync::{option_sync_db_entry, OperationFactory}; -use sd_utils::{chain_optional_iter, uuid_to_bytes}; -use uuid::Uuid; +use sd_utils::chain_optional_iter; use std::path::Path; use futures_concurrency::future::TryJoin; use once_cell::sync::Lazy; +use prisma_client_rust::QueryError; use super::from_slice_option_to_option; @@ -104,7 +104,7 @@ pub async fn extract( } pub async fn save( - exif_datas: impl IntoIterator + Send, + exif_datas: impl IntoIterator + Send, db: &PrismaClient, sync: &SyncManager, ) -> Result { @@ -120,7 +120,7 @@ pub async fn save( sync.shared_create( prisma_sync::exif_data::SyncId { object: prisma_sync::object::SyncId { - pub_id: uuid_to_bytes(&object_pub_id), + pub_id: object_pub_id.into(), }, }, sync_params, diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs index 78ebfd1637c6..f969c8993f16 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/ffmpeg_media_data.rs @@ -1,6 +1,7 @@ use crate::media_processor::{self, media_data_extractor}; use sd_core_prisma_helpers::object_with_media_data; + use sd_file_ext::extensions::{ AudioExtension, Extension, VideoExtension, ALL_AUDIO_EXTENSIONS, ALL_VIDEO_EXTENSIONS, }; @@ -188,9 +189,9 @@ async fn create_ffmpeg_data( )), ffmpeg_data::metadata::set( serde_json::to_vec(&metadata) - .map_err(|err| { - error!("Error reading FFmpegData metadata: {err:#?}"); - err + .map_err(|e| { + error!(?e, "Error reading FFmpegData metadata"); + e }) .ok(), ), @@ -232,9 +233,9 @@ async fn create_ffmpeg_chapters( ffmpeg_data_id, _params: vec![ffmpeg_media_chapter::metadata::set( serde_json::to_vec(&metadata) - .map_err(|err| { - error!("Error reading FFmpegMediaChapter metadata: {err:#?}"); - err + .map_err(|e| { + error!(?e, "Error reading FFmpegMediaChapter metadata"); + e }) .ok(), )], @@ -252,37 +253,36 @@ async fn create_ffmpeg_programs( programs: Vec, db: &PrismaClient, ) -> Result)>, QueryError> { - let (creates, streams_by_program_id) = - programs - .into_iter() - .map( - |Program { - id: program_id, - name, - metadata, - streams, - }| { - ( - ffmpeg_media_program::CreateUnchecked { - program_id, - ffmpeg_data_id: data_id, - _params: vec![ - ffmpeg_media_program::name::set(name), - ffmpeg_media_program::metadata::set( - serde_json::to_vec(&metadata) - .map_err(|err| { - error!("Error reading FFmpegMediaProgram metadata: {err:#?}"); - err - }) - .ok(), - ), - ], - }, - (program_id, streams), - ) - }, - ) - .unzip::<_, _, Vec<_>, Vec<_>>(); + let (creates, streams_by_program_id) = programs + .into_iter() + .map( + |Program { + id: program_id, + name, + metadata, + streams, + }| { + ( + ffmpeg_media_program::CreateUnchecked { + program_id, + ffmpeg_data_id: data_id, + _params: vec![ + ffmpeg_media_program::name::set(name), + ffmpeg_media_program::metadata::set( + serde_json::to_vec(&metadata) + .map_err(|e| { + error!(?e, "Error reading FFmpegMediaProgram metadata"); + e + }) + .ok(), + ), + ], + }, + (program_id, streams), + ) + }, + ) + .unzip::<_, _, Vec<_>, Vec<_>>(); db.ffmpeg_media_program() .create_many(creates) @@ -341,9 +341,9 @@ async fn create_ffmpeg_streams( ffmpeg_media_stream::language::set(metadata.language.clone()), ffmpeg_media_stream::metadata::set( serde_json::to_vec(&metadata) - .map_err(|err| { - error!("Error reading FFmpegMediaStream metadata: {err:#?}"); - err + .map_err(|e| { + error!(?e, "Error reading FFmpegMediaStream metadata"); + e }) .ok(), ), diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs index c0763cd02fd6..e79415abf55a 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs @@ -1,16 +1,14 @@ use crate::media_processor::thumbnailer; -use image::{imageops, DynamicImage, GenericImageView}; use sd_file_ext::extensions::{ DocumentExtension, Extension, ImageExtension, ALL_DOCUMENT_EXTENSIONS, ALL_IMAGE_EXTENSIONS, }; - -#[cfg(feature = "ffmpeg")] -use sd_file_ext::extensions::{VideoExtension, ALL_VIDEO_EXTENSIONS}; use sd_images::{format_image, scale_dimensions, ConvertibleExtension}; use sd_media_metadata::exif::Orientation; use sd_utils::error::FileIOError; -use webp::Encoder; + +#[cfg(feature = "ffmpeg")] +use sd_file_ext::extensions::{VideoExtension, ALL_VIDEO_EXTENSIONS}; use std::{ ops::Deref, @@ -19,17 +17,19 @@ use std::{ time::Duration, }; +use image::{imageops, DynamicImage, GenericImageView}; use once_cell::sync::Lazy; use serde::{Deserialize, Serialize}; use specta::Type; use tokio::{ fs, io, - sync::Mutex, + sync::{oneshot, Mutex}, task::spawn_blocking, time::{sleep, Instant}, }; -use tracing::{error, trace}; +use tracing::{error, instrument, trace}; use uuid::Uuid; +use webp::Encoder; // Files names constants pub const THUMBNAIL_CACHE_DIR_NAME: &str = "thumbnails"; @@ -214,11 +214,13 @@ pub const fn can_generate_thumbnail_for_document(document_extension: DocumentExt matches!(document_extension, Pdf) } +#[derive(Debug)] pub enum GenerationStatus { Generated, Skipped, } +#[instrument(skip(thumbnails_directory, cas_id, should_regenerate, kind))] pub async fn generate_thumbnail( thumbnails_directory: &Path, GenerateThumbnailArgs { @@ -232,7 +234,7 @@ pub async fn generate_thumbnail( Duration, Result<(ThumbKey, GenerationStatus), thumbnailer::NonCriticalThumbnailerError>, ) { - trace!("Generating thumbnail for {}", path.display()); + trace!("Generating thumbnail"); let start = Instant::now(); let mut output_path = match kind { @@ -247,15 +249,13 @@ pub async fn generate_thumbnail( if let Err(e) = fs::metadata(&*output_path).await { if e.kind() != io::ErrorKind::NotFound { error!( - "Failed to check if thumbnail exists, but we will try to generate it anyway: {e:#?}" + ?e, + "Failed to check if thumbnail exists, but we will try to generate it anyway" ); } // Otherwise we good, thumbnail doesn't exist so we can generate it } else if !should_regenerate { - trace!( - "Skipping thumbnail generation for {} because it already exists", - path.display() - ); + trace!("Skipping thumbnail generation because it already exists"); return ( start.elapsed(), Ok((ThumbKey::new(cas_id, kind), GenerationStatus::Skipped)), @@ -264,17 +264,19 @@ pub async fn generate_thumbnail( if let Ok(extension) = ImageExtension::from_str(extension) { if can_generate_thumbnail_for_image(extension) { - trace!("Generating image thumbnail for {}", path.display()); + trace!("Generating image thumbnail"); if let Err(e) = generate_image_thumbnail(&path, &output_path).await { return (start.elapsed(), Err(e)); } + trace!("Generated image thumbnail"); } } else if let Ok(extension) = DocumentExtension::from_str(extension) { - trace!("Generating document thumbnail for {}", path.display()); if can_generate_thumbnail_for_document(extension) { + trace!("Generating document thumbnail"); if let Err(e) = generate_image_thumbnail(&path, &output_path).await { return (start.elapsed(), Err(e)); } + trace!("Generating document thumbnail"); } } @@ -284,16 +286,17 @@ pub async fn generate_thumbnail( use sd_file_ext::extensions::VideoExtension; if let Ok(extension) = VideoExtension::from_str(extension) { - trace!("Generating image thumbnail for {}", path.display()); if can_generate_thumbnail_for_video(extension) { + trace!("Generating video thumbnail"); if let Err(e) = generate_video_thumbnail(&path, &output_path).await { return (start.elapsed(), Err(e)); } + trace!("Generated video thumbnail"); } } } - trace!("Generated thumbnail for {}", path.display()); + trace!("Generated thumbnail"); ( start.elapsed(), @@ -301,70 +304,92 @@ pub async fn generate_thumbnail( ) } +fn inner_generate_image_thumbnail( + file_path: PathBuf, +) -> Result, thumbnailer::NonCriticalThumbnailerError> { + let mut img = format_image(&file_path).map_err(|e| { + thumbnailer::NonCriticalThumbnailerError::FormatImage(file_path.clone(), e.to_string()) + })?; + + let (w, h) = img.dimensions(); + + #[allow(clippy::cast_precision_loss)] + let (w_scaled, h_scaled) = scale_dimensions(w as f32, h as f32, TARGET_PX); + + // Optionally, resize the existing photo and convert back into DynamicImage + if w != w_scaled && h != h_scaled { + img = DynamicImage::ImageRgba8(imageops::resize( + &img, + w_scaled, + h_scaled, + imageops::FilterType::Triangle, + )); + } + + // this corrects the rotation/flip of the image based on the *available* exif data + // not all images have exif data, so we don't error. we also don't rotate HEIF as that's against the spec + if let Some(orientation) = Orientation::from_path(&file_path) { + if ConvertibleExtension::try_from(file_path.as_ref()) + .expect("we already checked if the image was convertible") + .should_rotate() + { + img = orientation.correct_thumbnail(img); + } + } + + // Create the WebP encoder for the above image + let encoder = Encoder::from_image(&img).map_err(|reason| { + thumbnailer::NonCriticalThumbnailerError::WebPEncoding(file_path, reason.to_string()) + })?; + + // Type `WebPMemory` is !Send, which makes the `Future` in this function `!Send`, + // this make us `deref` to have a `&[u8]` and then `to_owned` to make a `Vec` + // which implies on a unwanted clone... + Ok(encoder.encode(TARGET_QUALITY).deref().to_owned()) +} + +#[instrument( + skip_all, + fields( + input_path = %file_path.as_ref().display(), + output_path = %output_path.as_ref().display() + ) +)] async fn generate_image_thumbnail( file_path: impl AsRef + Send, output_path: impl AsRef + Send, ) -> Result<(), thumbnailer::NonCriticalThumbnailerError> { let file_path = file_path.as_ref().to_path_buf(); - let webp = spawn_blocking({ + let (tx, rx) = oneshot::channel(); + + // Using channel instead of waiting the JoinHandle as for some reason + // the JoinHandle can take some extra time to complete + let handle = spawn_blocking({ let file_path = file_path.clone(); - move || -> Result<_, thumbnailer::NonCriticalThumbnailerError> { - let mut img = format_image(&file_path).map_err(|e| { - thumbnailer::NonCriticalThumbnailerError::FormatImage( - file_path.clone(), - e.to_string(), - ) - })?; - - let (w, h) = img.dimensions(); - - #[allow(clippy::cast_precision_loss)] - let (w_scaled, h_scaled) = scale_dimensions(w as f32, h as f32, TARGET_PX); - - // Optionally, resize the existing photo and convert back into DynamicImage - if w != w_scaled && h != h_scaled { - img = DynamicImage::ImageRgba8(imageops::resize( - &img, - w_scaled, - h_scaled, - imageops::FilterType::Triangle, - )); - } + move || { + // Handling error on receiver side + let _ = tx.send(inner_generate_image_thumbnail(file_path)); + } + }); - // this corrects the rotation/flip of the image based on the *available* exif data - // not all images have exif data, so we don't error. we also don't rotate HEIF as that's against the spec - if let Some(orientation) = Orientation::from_path(&file_path) { - if ConvertibleExtension::try_from(file_path.as_ref()) - .expect("we already checked if the image was convertible") - .should_rotate() - { - img = orientation.correct_thumbnail(img); - } - } + let webp = if let Ok(res) = rx.await { + res? + } else { + error!("Failed to generate thumbnail"); + return Err( + thumbnailer::NonCriticalThumbnailerError::PanicWhileGeneratingThumbnail( + file_path, + handle + .await + .expect_err("as the channel was closed, then the spawned task panicked") + .to_string(), + ), + ); + }; - // Create the WebP encoder for the above image - let encoder = Encoder::from_image(&img).map_err(|reason| { - thumbnailer::NonCriticalThumbnailerError::WebPEncoding( - file_path, - reason.to_string(), - ) - })?; - - // Type `WebPMemory` is !Send, which makes the `Future` in this function `!Send`, - // this make us `deref` to have a `&[u8]` and then `to_owned` to make a `Vec` - // which implies on a unwanted clone... - Ok(encoder.encode(TARGET_QUALITY).deref().to_owned()) - } - }) - .await - .map_err(|e| { - thumbnailer::NonCriticalThumbnailerError::PanicWhileGeneratingThumbnail( - file_path.clone(), - e.to_string(), - ) - })??; + trace!("Generated thumbnail bytes"); let output_path = output_path.as_ref(); @@ -375,20 +400,29 @@ async fn generate_image_thumbnail( ) })?; } else { - error!( - "Failed to get parent directory of '{}' for sharding parent directory", - output_path.display() - ); + error!("Failed to get parent directory for sharding parent directory"); } - fs::write(output_path, &webp).await.map_err(|e| { + trace!("Created shard directory and writing it to disk"); + + let res = fs::write(output_path, &webp).await.map_err(|e| { thumbnailer::NonCriticalThumbnailerError::SaveThumbnail( file_path, FileIOError::from((output_path, e)).to_string(), ) - }) + }); + + trace!("Wrote thumbnail to disk"); + res } +#[instrument( + skip_all, + fields( + input_path = %file_path.as_ref().display(), + output_path = %output_path.as_ref().display() + ) +)] #[cfg(feature = "ffmpeg")] async fn generate_video_thumbnail( file_path: impl AsRef + Send, @@ -413,7 +447,7 @@ async fn generate_video_thumbnail( }) } -const ONE_SEC: Duration = Duration::from_secs(1); +const HALF_SEC: Duration = Duration::from_millis(500); static LAST_SINGLE_THUMB_GENERATED_LOCK: Lazy> = Lazy::new(|| Mutex::new(Instant::now())); @@ -428,10 +462,10 @@ pub async fn generate_single_thumbnail( let mut last_single_thumb_generated_guard = LAST_SINGLE_THUMB_GENERATED_LOCK.lock().await; let elapsed = Instant::now() - *last_single_thumb_generated_guard; - if elapsed < ONE_SEC { + if elapsed < HALF_SEC { // This will choke up in case someone try to use this method in a loop, otherwise // it will consume all the machine resources like a gluton monster from hell - sleep(ONE_SEC - elapsed).await; + sleep(HALF_SEC - elapsed).await; } let (_duration, res) = generate_thumbnail( diff --git a/core/crates/heavy-lifting/src/media_processor/job.rs b/core/crates/heavy-lifting/src/media_processor/job.rs index 2b9e65c1afc1..8a7217a04d6c 100644 --- a/core/crates/heavy-lifting/src/media_processor/job.rs +++ b/core/crates/heavy-lifting/src/media_processor/job.rs @@ -23,7 +23,7 @@ use sd_task_system::{ use sd_utils::{db::maybe_missing, u64_to_frontend}; use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fmt, hash::{Hash, Hasher}, mem, @@ -38,10 +38,10 @@ use itertools::Itertools; use prisma_client_rust::{raw, PrismaValue}; use serde::{Deserialize, Serialize}; use serde_json::json; -use tracing::{debug, error, warn}; +use tracing::{debug, error, instrument, trace, warn, Level}; use super::{ - helpers, + get_direct_children_files_by_extensions, helpers, tasks::{ self, media_data_extractor, thumbnailer::{self, NewThumbnailReporter}, @@ -80,22 +80,24 @@ impl fmt::Display for Phase { #[derive(Debug)] pub struct MediaProcessor { + // Received arguments location: Arc, location_path: Arc, sub_path: Option, regenerate_thumbnails: bool, + // Job control total_media_data_extraction_files: u64, total_media_data_extraction_tasks: u64, total_thumbnailer_tasks: u64, total_thumbnailer_files: u64, - phase: Phase, + // Run data metadata: Metadata, - errors: Vec, + // On shutdown data pending_tasks_on_resume: Vec>, tasks_for_shutdown: Vec>>, } @@ -148,6 +150,17 @@ impl Job for MediaProcessor { Ok(()) } + #[instrument( + skip_all, + fields( + location_id = self.location.id, + location_path = ?self.location.path, + sub_path = ?self.sub_path.as_ref().map(|path| path.display()), + regenerate_thumbnails = self.regenerate_thumbnails, + ), + ret(level = Level::TRACE), + err, + )] async fn run( mut self, dispatcher: JobTaskDispatcher, @@ -248,18 +261,14 @@ impl MediaProcessor { Ok, )?; - debug!( - "Searching for media files in location {location_id} at directory \"{iso_file_path}\"" - ); - // First we will dispatch all tasks for media data extraction so we have a nice reporting let (total_media_data_extraction_files, task_handles) = dispatch_media_data_extractor_tasks( - job_ctx.db(), - job_ctx.sync(), &iso_file_path, &self.location_path, dispatcher, + job_ctx.db(), + job_ctx.sync(), ) .await?; self.total_media_data_extraction_files = total_media_data_extraction_files; @@ -341,7 +350,7 @@ impl MediaProcessor { } Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => { - warn!("Task returned an empty output"); + warn!(%task_id, "Task returned an empty output"); } Ok(TaskStatus::Shutdown(task)) => { @@ -360,6 +369,19 @@ impl MediaProcessor { return Some(Ok(ReturnStatus::Canceled)); } + Err(TaskSystemError::TaskTimeout(task_id)) => { + warn!( + %task_id, + "Thumbnailer task timed out, we will keep processing the rest of the tasks" + ); + self.errors.push( + media_processor::NonCriticalMediaProcessorError::Thumbnailer( + media_processor::NonCriticalThumbnailerError::TaskTimeout(task_id), + ) + .into(), + ); + } + Err(e) => { error!("Task System error: {e:#?}"); cancel_pending_tasks(&*pending_running_tasks).await; @@ -391,10 +413,10 @@ impl MediaProcessor { self.metadata.media_data_metrics.extracted += extracted; self.metadata.media_data_metrics.skipped += skipped; - self.metadata.media_data_metrics.db_read_time += db_read_time; - self.metadata.media_data_metrics.filtering_time += filtering_time; - self.metadata.media_data_metrics.extraction_time += extraction_time; - self.metadata.media_data_metrics.db_write_time += db_write_time; + self.metadata.media_data_metrics.mean_db_read_time += db_read_time; + self.metadata.media_data_metrics.mean_filtering_time += filtering_time; + self.metadata.media_data_metrics.mean_extraction_time += extraction_time; + self.metadata.media_data_metrics.mean_db_write_time += db_write_time; self.metadata.media_data_metrics.total_successful_tasks += 1; if !errors.is_empty() { @@ -403,7 +425,7 @@ impl MediaProcessor { } debug!( - "Processed {}/{} media data extraction tasks, took: {:?}", + "Processed ({}/{}) media data extraction tasks, took: {:?}", self.metadata.media_data_metrics.total_successful_tasks, self.total_media_data_extraction_tasks, db_read_time + filtering_time + extraction_time + db_write_time, @@ -445,7 +467,7 @@ impl MediaProcessor { self.metadata.thumbnailer_metrics_acc.generated += generated; self.metadata.thumbnailer_metrics_acc.skipped += skipped; - self.metadata.thumbnailer_metrics_acc.total_time += total_time; + self.metadata.thumbnailer_metrics_acc.mean_total_time += total_time; self.metadata.thumbnailer_metrics_acc.mean_time_acc += mean_time_acc; self.metadata.thumbnailer_metrics_acc.std_dev_acc += std_dev_acc; self.metadata.thumbnailer_metrics_acc.total_successful_tasks += 1; @@ -456,7 +478,7 @@ impl MediaProcessor { } debug!( - "Processed {}/{} thumbnailer tasks, took: {total_time:?}", + "Processed ({}/{}) thumbnailer tasks, took: {total_time:?}", self.metadata.thumbnailer_metrics_acc.total_successful_tasks, self.total_thumbnailer_tasks ); @@ -535,10 +557,10 @@ impl From for Vec { struct MediaExtractorMetrics { extracted: u64, skipped: u64, - db_read_time: Duration, - filtering_time: Duration, - extraction_time: Duration, - db_write_time: Duration, + mean_db_read_time: Duration, + mean_filtering_time: Duration, + mean_extraction_time: Duration, + mean_db_write_time: Duration, total_successful_tasks: u64, } @@ -546,7 +568,7 @@ struct MediaExtractorMetrics { struct ThumbnailerMetricsAccumulator { generated: u64, skipped: u64, - total_time: Duration, + mean_total_time: Duration, mean_time_acc: f64, std_dev_acc: f64, total_successful_tasks: u64, @@ -556,7 +578,7 @@ struct ThumbnailerMetricsAccumulator { struct ThumbnailerMetrics { generated: u64, skipped: u64, - total_generation_time: Duration, + mean_total_time: Duration, mean_generation_time: Duration, std_dev: Duration, total_successful_tasks: u64, @@ -567,7 +589,7 @@ impl From for ThumbnailerMetrics { ThumbnailerMetricsAccumulator { generated, skipped, - total_time: total_generation_time, + mean_total_time, mean_time_acc: mean_generation_time_acc, std_dev_acc, total_successful_tasks, @@ -579,166 +601,47 @@ impl From for ThumbnailerMetrics { let total = (generated + skipped) as f64; let mean_generation_time = mean_generation_time_acc / total; - let std_dev = Duration::from_secs_f64( - (mean_generation_time.mul_add(-mean_generation_time, std_dev_acc / total)).sqrt(), - ); + let std_dev = if generated > 1 { + Duration::from_secs_f64( + (mean_generation_time.mul_add(-mean_generation_time, std_dev_acc / total)).sqrt(), + ) + } else { + Duration::ZERO + }; Self { generated, skipped, - total_generation_time, - mean_generation_time: Duration::from_secs_f64(mean_generation_time), + mean_total_time, + mean_generation_time: Duration::from_secs_f64(if generated > 1 { + mean_generation_time + } else { + mean_generation_time_acc + }), std_dev, total_successful_tasks, } } } -#[derive(Serialize, Deserialize)] -struct SaveState { - location: Arc, - location_path: Arc, - sub_path: Option, - regenerate_thumbnails: bool, - - total_media_data_extraction_files: u64, - total_media_data_extraction_tasks: u64, - total_thumbnailer_tasks: u64, - total_thumbnailer_files: u64, - - phase: Phase, - - metadata: Metadata, - - errors: Vec, - - tasks_for_shutdown_bytes: Option, -} - -impl SerializableJob for MediaProcessor { - async fn serialize(self) -> Result>, rmp_serde::encode::Error> { - let Self { - location, - location_path, - sub_path, - regenerate_thumbnails, - total_media_data_extraction_files, - total_media_data_extraction_tasks, - total_thumbnailer_tasks, - total_thumbnailer_files, - phase, - metadata, - errors, - tasks_for_shutdown, - .. - } = self; - - rmp_serde::to_vec_named(&SaveState { - location, - location_path, - sub_path, - regenerate_thumbnails, - total_media_data_extraction_files, - total_media_data_extraction_tasks, - total_thumbnailer_tasks, - total_thumbnailer_files, - phase, - metadata, - tasks_for_shutdown_bytes: Some(SerializedTasks(rmp_serde::to_vec_named( - &tasks_for_shutdown - .into_iter() - .map(|task| async move { - if task.is::() { - task.downcast::() - .expect("just checked") - .serialize() - .await - .map(|bytes| (TaskKind::MediaDataExtractor, bytes)) - } else if task.is::() { - task.downcast::() - .expect("just checked") - .serialize() - .await - .map(|bytes| (TaskKind::Thumbnailer, bytes)) - } else { - unreachable!("Unexpected task type: ") - } - }) - .collect::>() - .try_join() - .await?, - )?)), - errors, - }) - .map(Some) - } - - async fn deserialize( - serialized_job: &[u8], - _: &OuterCtx, - ) -> Result)>, rmp_serde::decode::Error> { - let SaveState { - location, - location_path, - sub_path, - regenerate_thumbnails, - total_media_data_extraction_files, - total_media_data_extraction_tasks, - total_thumbnailer_tasks, - total_thumbnailer_files, - phase, - metadata, - errors, - tasks_for_shutdown_bytes, - } = rmp_serde::from_slice::(serialized_job)?; - - Ok(Some(( - Self { - location, - location_path, - sub_path, - regenerate_thumbnails, - total_media_data_extraction_files, - total_media_data_extraction_tasks, - total_thumbnailer_tasks, - total_thumbnailer_files, - phase, - metadata, - errors, - pending_tasks_on_resume: Vec::new(), - tasks_for_shutdown: Vec::new(), - }, - tasks_for_shutdown_bytes, - ))) - } -} - -impl Hash for MediaProcessor { - fn hash(&self, state: &mut H) { - self.location.id.hash(state); - if let Some(ref sub_path) = self.sub_path { - sub_path.hash(state); - } - } -} - +#[instrument(skip_all, fields(parent_iso_file_path = %parent_iso_file_path.as_ref().display()))] async fn dispatch_media_data_extractor_tasks( - db: &Arc, - sync: &Arc, parent_iso_file_path: &IsolatedFilePathData<'_>, location_path: &Arc, dispatcher: &JobTaskDispatcher, + db: &Arc, + sync: &Arc, ) -> Result<(u64, Vec>), media_processor::Error> { let (extract_exif_file_paths, extract_ffmpeg_file_paths) = ( get_all_children_files_by_extensions( - db, parent_iso_file_path, &helpers::exif_media_data::AVAILABLE_EXTENSIONS, + db, ), get_all_children_files_by_extensions( - db, parent_iso_file_path, &helpers::ffmpeg_media_data::AVAILABLE_EXTENSIONS, + db, ), ) .try_join() @@ -780,19 +683,26 @@ async fn dispatch_media_data_extractor_tasks( ) .collect::>(); + trace!( + tasks_count = tasks.len(), + %files_count, + "Dispatching media data extraction tasks", + ); + Ok((files_count, dispatcher.dispatch_many_boxed(tasks).await)) } async fn get_all_children_files_by_extensions( - db: &PrismaClient, parent_iso_file_path: &IsolatedFilePathData<'_>, extensions: &[Extension], + db: &PrismaClient, ) -> Result, media_processor::Error> { // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite // We have no data coming from the user, so this is sql injection safe - db._query_raw::(raw!( - &format!( - "SELECT + let unique_by_object_id = db + ._query_raw::(raw!( + &format!( + "SELECT file_path.id, file_path.materialized_path, file_path.is_dir, @@ -808,27 +718,30 @@ async fn get_all_children_files_by_extensions( AND file_path.cas_id IS NOT NULL AND LOWER(file_path.extension) IN ({}) AND file_path.materialized_path LIKE {{}} - ORDER BY materialized_path ASC", - // Ordering by materialized_path so we can prioritize processing the first files - // in the above part of the directories tree - extensions - .iter() - .map(|ext| format!("LOWER('{ext}')")) - .collect::>() - .join(",") - ), - PrismaValue::Int(parent_iso_file_path.location_id()), - PrismaValue::String(format!( - "{}%", - parent_iso_file_path - .materialized_path_for_children() - .expect("sub path iso_file_path must be a directory") + ORDER BY materialized_path ASC, name ASC", + // Ordering by materialized_path so we can prioritize processing the first files + // in the above part of the directories tree + extensions + .iter() + .map(|ext| format!("LOWER('{ext}')")) + .collect::>() + .join(",") + ), + PrismaValue::Int(parent_iso_file_path.location_id()), + PrismaValue::String(format!( + "{}%", + parent_iso_file_path + .materialized_path_for_children() + .expect("sub path iso_file_path must be a directory") + )) )) - )) - .exec() - .await - .map(|raw_files| raw_files.into_iter().map(Into::into).collect()) - .map_err(Into::into) + .exec() + .await? + .into_iter() + .map(|raw_file_path| (raw_file_path.object_id, raw_file_path)) + .collect::>(); + + Ok(unique_by_object_id.into_values().map(Into::into).collect()) } async fn dispatch_thumbnailer_tasks( @@ -846,51 +759,52 @@ async fn dispatch_thumbnailer_tasks( let reporter: Arc = Arc::new(NewThumbnailsReporter { ctx: ctx.clone() }); - let mut file_paths = get_all_children_files_by_extensions( - db, + let priority_file_paths = get_direct_children_files_by_extensions( parent_iso_file_path, &helpers::thumbnailer::ALL_THUMBNAILABLE_EXTENSIONS, + db, ) .await?; - if file_paths.is_empty() { - return Ok((0, Vec::new())); - } + let priority_file_path_ids = priority_file_paths + .iter() + .map(|file_path| file_path.id) + .collect::>(); - let thumbs_count = file_paths.len() as u64; + let mut file_paths = get_all_children_files_by_extensions( + parent_iso_file_path, + &helpers::thumbnailer::ALL_THUMBNAILABLE_EXTENSIONS, + db, + ) + .await?; - let first_materialized_path = file_paths[0].materialized_path.clone(); + file_paths.retain(|file_path| !priority_file_path_ids.contains(&file_path.id)); - // Only the first materialized_path should be processed with priority as the user must see the thumbnails ASAP - let different_materialized_path_idx = file_paths - .iter() - .position(|file_path| file_path.materialized_path != first_materialized_path); + if priority_file_path_ids.is_empty() && file_paths.is_empty() { + return Ok((0, Vec::new())); + } - // TODO debug why we have more priority tasks than we should + let thumbs_count = (priority_file_paths.len() + file_paths.len()) as u64; - let non_priority_tasks = different_materialized_path_idx - .map(|idx| { - file_paths - .drain(idx..) - .chunks(BATCH_SIZE) - .into_iter() - .map(|chunk| { - tasks::Thumbnailer::new_indexed( - Arc::clone(&thumbnails_directory_path), - &chunk.collect::>(), - (location_id, location_path), - library_id, - should_regenerate, - false, - Arc::clone(&reporter), - ) - }) - .map(IntoTask::into_task) - .collect::>() + let priority_tasks = priority_file_paths + .into_iter() + .chunks(BATCH_SIZE) + .into_iter() + .map(|chunk| { + tasks::Thumbnailer::new_indexed( + Arc::clone(&thumbnails_directory_path), + &chunk.collect::>(), + (location_id, location_path), + library_id, + should_regenerate, + true, + Arc::clone(&reporter), + ) }) - .unwrap_or_default(); + .map(IntoTask::into_task) + .collect::>(); - let priority_tasks = file_paths + let non_priority_tasks = file_paths .into_iter() .chunks(BATCH_SIZE) .into_iter() @@ -901,7 +815,7 @@ async fn dispatch_thumbnailer_tasks( (location_id, location_path), library_id, should_regenerate, - true, + false, Arc::clone(&reporter), ) }) @@ -909,9 +823,10 @@ async fn dispatch_thumbnailer_tasks( .collect::>(); debug!( - "Dispatching {thumbs_count} thumbnails to be processed, {} with priority and {} without priority tasks", - priority_tasks.len(), - non_priority_tasks.len() + %thumbs_count, + priority_tasks_count = priority_tasks.len(), + non_priority_tasks_count = non_priority_tasks.len(), + "Dispatching thumbnails to be processed", ); Ok(( @@ -921,3 +836,131 @@ async fn dispatch_thumbnailer_tasks( .await, )) } + +#[derive(Serialize, Deserialize)] +struct SaveState { + location: Arc, + location_path: Arc, + sub_path: Option, + regenerate_thumbnails: bool, + + total_media_data_extraction_files: u64, + total_media_data_extraction_tasks: u64, + total_thumbnailer_tasks: u64, + total_thumbnailer_files: u64, + + phase: Phase, + + metadata: Metadata, + + errors: Vec, + + tasks_for_shutdown_bytes: Option, +} + +impl SerializableJob for MediaProcessor { + async fn serialize(self) -> Result>, rmp_serde::encode::Error> { + let Self { + location, + location_path, + sub_path, + regenerate_thumbnails, + total_media_data_extraction_files, + total_media_data_extraction_tasks, + total_thumbnailer_tasks, + total_thumbnailer_files, + phase, + metadata, + errors, + tasks_for_shutdown, + .. + } = self; + + rmp_serde::to_vec_named(&SaveState { + location, + location_path, + sub_path, + regenerate_thumbnails, + total_media_data_extraction_files, + total_media_data_extraction_tasks, + total_thumbnailer_tasks, + total_thumbnailer_files, + phase, + metadata, + tasks_for_shutdown_bytes: Some(SerializedTasks(rmp_serde::to_vec_named( + &tasks_for_shutdown + .into_iter() + .map(|task| async move { + if task.is::() { + task.downcast::() + .expect("just checked") + .serialize() + .await + .map(|bytes| (TaskKind::MediaDataExtractor, bytes)) + } else if task.is::() { + task.downcast::() + .expect("just checked") + .serialize() + .await + .map(|bytes| (TaskKind::Thumbnailer, bytes)) + } else { + unreachable!("Unexpected task type: ") + } + }) + .collect::>() + .try_join() + .await?, + )?)), + errors, + }) + .map(Some) + } + + async fn deserialize( + serialized_job: &[u8], + _: &OuterCtx, + ) -> Result)>, rmp_serde::decode::Error> { + let SaveState { + location, + location_path, + sub_path, + regenerate_thumbnails, + total_media_data_extraction_files, + total_media_data_extraction_tasks, + total_thumbnailer_tasks, + total_thumbnailer_files, + phase, + metadata, + errors, + tasks_for_shutdown_bytes, + } = rmp_serde::from_slice::(serialized_job)?; + + Ok(Some(( + Self { + location, + location_path, + sub_path, + regenerate_thumbnails, + total_media_data_extraction_files, + total_media_data_extraction_tasks, + total_thumbnailer_tasks, + total_thumbnailer_files, + phase, + metadata, + errors, + pending_tasks_on_resume: Vec::new(), + tasks_for_shutdown: Vec::new(), + }, + tasks_for_shutdown_bytes, + ))) + } +} + +impl Hash for MediaProcessor { + fn hash(&self, state: &mut H) { + self.location.id.hash(state); + if let Some(ref sub_path) = self.sub_path { + sub_path.hash(state); + } + } +} diff --git a/core/crates/heavy-lifting/src/media_processor/mod.rs b/core/crates/heavy-lifting/src/media_processor/mod.rs index 9f480a3f5d3a..c21c2e3dcf33 100644 --- a/core/crates/heavy-lifting/src/media_processor/mod.rs +++ b/core/crates/heavy-lifting/src/media_processor/mod.rs @@ -1,13 +1,15 @@ use crate::{utils::sub_path, OuterContext, UpdateEvent}; -use sd_core_file_path_helper::FilePathError; - +use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData}; use sd_core_prisma_helpers::file_path_for_media_processor; -use sd_prisma::prisma::{file_path, object}; + +use sd_file_ext::extensions::Extension; +use sd_prisma::prisma::{file_path, object, PrismaClient}; use sd_utils::db::MissingFieldError; -use std::fmt; +use std::{collections::HashMap, fmt}; +use prisma_client_rust::{raw, PrismaValue}; use serde::{Deserialize, Serialize}; use specta::Type; @@ -131,3 +133,52 @@ impl From for file_path_for_media_processor::Data } } } + +async fn get_direct_children_files_by_extensions( + parent_iso_file_path: &IsolatedFilePathData<'_>, + extensions: &[Extension], + db: &PrismaClient, +) -> Result, Error> { + // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite + // We have no data coming from the user, so this is sql injection safe + let unique_by_object_id = db + ._query_raw::(raw!( + &format!( + "SELECT + file_path.id, + file_path.materialized_path, + file_path.is_dir, + file_path.name, + file_path.extension, + file_path.cas_id, + object.id as 'object_id', + object.pub_id as 'object_pub_id' + FROM file_path + INNER JOIN object ON object.id = file_path.object_id + WHERE + location_id={{}} + AND cas_id IS NOT NULL + AND LOWER(extension) IN ({}) + AND materialized_path = {{}} + ORDER BY name ASC", + extensions + .iter() + .map(|ext| format!("LOWER('{ext}')")) + .collect::>() + .join(",") + ), + PrismaValue::Int(parent_iso_file_path.location_id()), + PrismaValue::String( + parent_iso_file_path + .materialized_path_for_children() + .expect("sub path iso_file_path must be a directory") + ) + )) + .exec() + .await? + .into_iter() + .map(|raw_file_path| (raw_file_path.object_id, raw_file_path)) + .collect::>(); + + Ok(unique_by_object_id.into_values().map(Into::into).collect()) +} diff --git a/core/crates/heavy-lifting/src/media_processor/shallow.rs b/core/crates/heavy-lifting/src/media_processor/shallow.rs index bdb1515038a4..6f96db0c9602 100644 --- a/core/crates/heavy-lifting/src/media_processor/shallow.rs +++ b/core/crates/heavy-lifting/src/media_processor/shallow.rs @@ -4,10 +4,8 @@ use crate::{ }; use sd_core_file_path_helper::IsolatedFilePathData; -use sd_core_prisma_helpers::file_path_for_media_processor; use sd_core_sync::Manager as SyncManager; -use sd_file_ext::extensions::Extension; use sd_prisma::prisma::{location, PrismaClient}; use sd_task_system::{ BaseTaskDispatcher, CancelTaskOnDrop, IntoTask, TaskDispatcher, TaskHandle, TaskOutput, @@ -23,16 +21,16 @@ use std::{ use futures::{stream::FuturesUnordered, StreamExt}; use futures_concurrency::future::TryJoin; use itertools::Itertools; -use prisma_client_rust::{raw, PrismaValue}; use tracing::{debug, warn}; use super::{ + get_direct_children_files_by_extensions, helpers::{self, exif_media_data, ffmpeg_media_data, thumbnailer::THUMBNAIL_CACHE_DIR_NAME}, tasks::{ self, media_data_extractor, thumbnailer::{self, NewThumbnailReporter}, }, - NewThumbnailsReporter, RawFilePathForMediaProcessor, BATCH_SIZE, + NewThumbnailsReporter, BATCH_SIZE, }; #[allow(clippy::missing_panics_doc)] // SAFETY: It doesn't actually panics @@ -163,15 +161,15 @@ async fn dispatch_media_data_extractor_tasks( dispatcher: &BaseTaskDispatcher, ) -> Result>, media_processor::Error> { let (extract_exif_file_paths, extract_ffmpeg_file_paths) = ( - get_files_by_extensions( - db, + get_direct_children_files_by_extensions( parent_iso_file_path, &exif_media_data::AVAILABLE_EXTENSIONS, - ), - get_files_by_extensions( db, + ), + get_direct_children_files_by_extensions( parent_iso_file_path, &ffmpeg_media_data::AVAILABLE_EXTENSIONS, + db, ), ) .try_join() @@ -214,50 +212,6 @@ async fn dispatch_media_data_extractor_tasks( Ok(dispatcher.dispatch_many_boxed(tasks).await) } -async fn get_files_by_extensions( - db: &PrismaClient, - parent_iso_file_path: &IsolatedFilePathData<'_>, - extensions: &[Extension], -) -> Result, media_processor::Error> { - // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite - // We have no data coming from the user, so this is sql injection safe - db._query_raw::(raw!( - &format!( - "SELECT - file_path.id, - file_path.materialized_path, - file_path.is_dir, - file_path.name, - file_path.extension, - file_path.cas_id, - object.id as 'object_id', - object.pub_id as 'object_pub_id' - FROM file_path - INNER JOIN object ON object.id = file_path.object_id - WHERE - location_id={{}} - AND cas_id IS NOT NULL - AND LOWER(extension) IN ({}) - AND materialized_path = {{}}", - extensions - .iter() - .map(|ext| format!("LOWER('{ext}')")) - .collect::>() - .join(",") - ), - PrismaValue::Int(parent_iso_file_path.location_id()), - PrismaValue::String( - parent_iso_file_path - .materialized_path_for_children() - .expect("sub path iso_file_path must be a directory") - ) - )) - .exec() - .await - .map(|raw_files| raw_files.into_iter().map(Into::into).collect()) - .map_err(Into::into) -} - async fn dispatch_thumbnailer_tasks( parent_iso_file_path: &IsolatedFilePathData<'_>, should_regenerate: bool, @@ -273,10 +227,10 @@ async fn dispatch_thumbnailer_tasks( let reporter: Arc = Arc::new(NewThumbnailsReporter { ctx: ctx.clone() }); - let file_paths = get_files_by_extensions( - db, + let file_paths = get_direct_children_files_by_extensions( parent_iso_file_path, &helpers::thumbnailer::ALL_THUMBNAILABLE_EXTENSIONS, + db, ) .await?; diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs b/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs index 9bd1f5d3de55..b7ec1edbd241 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/media_data_extractor.rs @@ -7,7 +7,7 @@ use crate::{ }; use sd_core_file_path_helper::IsolatedFilePathData; -use sd_core_prisma_helpers::file_path_for_media_processor; +use sd_core_prisma_helpers::{file_path_for_media_processor, ObjectPubId}; use sd_core_sync::Manager as SyncManager; use sd_media_metadata::{ExifMetadata, FFmpegMetadata}; @@ -16,8 +16,6 @@ use sd_task_system::{ check_interruption, ExecStatus, Interrupter, InterruptionKind, IntoAnyTaskOutput, SerializableTask, Task, TaskId, }; -use sd_utils::from_bytes_to_uuid; -use uuid::Uuid; use std::{ collections::{HashMap, HashSet}, @@ -34,6 +32,17 @@ use futures_concurrency::future::Race; use serde::{Deserialize, Serialize}; use specta::Type; use tokio::time::Instant; +use tracing::{debug, instrument, trace, Level}; + +#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)] +pub enum NonCriticalMediaDataExtractorError { + #[error("failed to extract media data from : {1}", .0.display())] + FailedToExtractImageMediaData(PathBuf, String), + #[error("file path missing object id: ")] + FilePathMissingObjectId(file_path::id::Type), + #[error("failed to construct isolated file path data: : {1}")] + FailedToConstructIsolatedFilePathData(file_path::id::Type, String), +} #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] enum Kind { @@ -43,15 +52,24 @@ enum Kind { #[derive(Debug)] pub struct MediaDataExtractor { + // Task control id: TaskId, kind: Kind, + + // Received input args file_paths: Vec, location_id: location::id::Type, location_path: Arc, + + // Inner state stage: Stage, + + // Out collector + output: Output, + + // Dependencies db: Arc, sync: Arc, - output: Output, } #[derive(Debug, Serialize, Deserialize)] @@ -59,87 +77,34 @@ enum Stage { Starting, FetchedObjectsAlreadyWithMediaData(Vec), ExtractingMediaData { - paths_by_id: HashMap, - exif_media_datas: Vec<(ExifMetadata, object::id::Type, Uuid)>, + paths_by_id: HashMap, + exif_media_datas: Vec<(ExifMetadata, object::id::Type, ObjectPubId)>, ffmpeg_media_datas: Vec<(FFmpegMetadata, object::id::Type)>, extract_ids_to_remove_from_map: Vec, }, SaveMediaData { - exif_media_datas: Vec<(ExifMetadata, object::id::Type, Uuid)>, + exif_media_datas: Vec<(ExifMetadata, object::id::Type, ObjectPubId)>, ffmpeg_media_datas: Vec<(FFmpegMetadata, object::id::Type)>, }, } -impl MediaDataExtractor { - fn new( - kind: Kind, - file_paths: &[file_path_for_media_processor::Data], - location_id: location::id::Type, - location_path: Arc, - db: Arc, - sync: Arc, - ) -> Self { - let mut output = Output::default(); - - Self { - id: TaskId::new_v4(), - kind, - file_paths: file_paths - .iter() - .filter(|file_path| { - if file_path.object.is_some() { - true - } else { - output.errors.push( - media_processor::NonCriticalMediaProcessorError::from( - NonCriticalMediaDataExtractorError::FilePathMissingObjectId( - file_path.id, - ), - ) - .into(), - ); - false - } - }) - .cloned() - .collect(), - location_id, - location_path, - stage: Stage::Starting, - db, - sync, - output, - } - } - - #[must_use] - pub fn new_exif( - file_paths: &[file_path_for_media_processor::Data], - location_id: location::id::Type, - location_path: Arc, - db: Arc, - sync: Arc, - ) -> Self { - Self::new(Kind::Exif, file_paths, location_id, location_path, db, sync) - } - - #[must_use] - pub fn new_ffmpeg( - file_paths: &[file_path_for_media_processor::Data], - location_id: location::id::Type, - location_path: Arc, - db: Arc, - sync: Arc, - ) -> Self { - Self::new( - Kind::FFmpeg, - file_paths, - location_id, - location_path, - db, - sync, - ) - } +/// [`MediaDataExtractor`] task output +#[derive(Serialize, Deserialize, Default, Debug)] +pub struct Output { + /// How many files were successfully processed + pub extracted: u64, + /// How many files were skipped + pub skipped: u64, + /// Time spent reading data from database + pub db_read_time: Duration, + /// Time spent filtering files to extract media data and files to skip + pub filtering_time: Duration, + /// Time spent extracting media data + pub extraction_time: Duration, + /// Time spent writing media data to database + pub db_write_time: Duration, + /// Errors encountered during the task + pub errors: Vec, } #[async_trait::async_trait] @@ -155,6 +120,19 @@ impl Task for MediaDataExtractor { false } + #[instrument( + skip_all, + fields( + task_id = %self.id, + kind = ?self.kind, + location_id = %self.location_id, + location_path = %self.location_path.display(), + file_paths_count = %self.file_paths.len(), + ), + ret(level = Level::TRACE), + err, + )] + #[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above #[allow(clippy::too_many_lines)] async fn run(&mut self, interrupter: &Interrupter) -> Result { loop { @@ -168,17 +146,22 @@ impl Task for MediaDataExtractor { ) .await?; self.output.db_read_time = db_read_start.elapsed(); + trace!( + object_ids_count = object_ids.len(), + "Fetched objects already with media data", + ); self.stage = Stage::FetchedObjectsAlreadyWithMediaData(object_ids); } Stage::FetchedObjectsAlreadyWithMediaData(objects_already_with_media_data) => { - let filtering_start = Instant::now(); if self.file_paths.len() == objects_already_with_media_data.len() { self.output.skipped = self.file_paths.len() as u64; // Files already have media data, skipping - + debug!("Skipped all files as they already have media data"); break; } + + let filtering_start = Instant::now(); let paths_by_id = filter_files_to_extract_media_data( mem::take(objects_already_with_media_data), self.location_id, @@ -186,9 +169,13 @@ impl Task for MediaDataExtractor { &mut self.file_paths, &mut self.output, ); - self.output.filtering_time = filtering_start.elapsed(); + trace!( + paths_needing_media_data_extraction_count = paths_by_id.len(), + "Filtered files to extract media data", + ); + self.stage = Stage::ExtractingMediaData { extract_ids_to_remove_from_map: Vec::with_capacity(paths_by_id.len()), exif_media_datas: if self.kind == Kind::Exif { @@ -281,92 +268,74 @@ impl Task for MediaDataExtractor { } } -#[derive(thiserror::Error, Debug, Serialize, Deserialize, Type, Clone)] -pub enum NonCriticalMediaDataExtractorError { - #[error("failed to extract media data from : {1}", .0.display())] - FailedToExtractImageMediaData(PathBuf, String), - #[error("file path missing object id: ")] - FilePathMissingObjectId(file_path::id::Type), - #[error("failed to construct isolated file path data: : {1}")] - FailedToConstructIsolatedFilePathData(file_path::id::Type, String), -} - -#[derive(Serialize, Deserialize, Default, Debug)] -pub struct Output { - pub extracted: u64, - pub skipped: u64, - pub db_read_time: Duration, - pub filtering_time: Duration, - pub extraction_time: Duration, - pub db_write_time: Duration, - pub errors: Vec, -} - -#[derive(Debug, Serialize, Deserialize)] -struct SaveState { - id: TaskId, - kind: Kind, - file_paths: Vec, - location_id: location::id::Type, - location_path: Arc, - stage: Stage, - output: Output, -} - -impl SerializableTask for MediaDataExtractor { - type SerializeError = rmp_serde::encode::Error; - - type DeserializeError = rmp_serde::decode::Error; - - type DeserializeCtx = (Arc, Arc); +impl MediaDataExtractor { + fn new( + kind: Kind, + file_paths: &[file_path_for_media_processor::Data], + location_id: location::id::Type, + location_path: Arc, + db: Arc, + sync: Arc, + ) -> Self { + let mut output = Output::default(); - async fn serialize(self) -> Result, Self::SerializeError> { - let Self { - id, + Self { + id: TaskId::new_v4(), kind, - file_paths, + file_paths: file_paths + .iter() + .filter(|file_path| { + if file_path.object.is_some() { + true + } else { + output.errors.push( + media_processor::NonCriticalMediaProcessorError::from( + NonCriticalMediaDataExtractorError::FilePathMissingObjectId( + file_path.id, + ), + ) + .into(), + ); + false + } + }) + .cloned() + .collect(), location_id, location_path, - stage, + stage: Stage::Starting, + db, + sync, output, - .. - } = self; + } + } - rmp_serde::to_vec_named(&SaveState { - id, - kind, + #[must_use] + pub fn new_exif( + file_paths: &[file_path_for_media_processor::Data], + location_id: location::id::Type, + location_path: Arc, + db: Arc, + sync: Arc, + ) -> Self { + Self::new(Kind::Exif, file_paths, location_id, location_path, db, sync) + } + + #[must_use] + pub fn new_ffmpeg( + file_paths: &[file_path_for_media_processor::Data], + location_id: location::id::Type, + location_path: Arc, + db: Arc, + sync: Arc, + ) -> Self { + Self::new( + Kind::FFmpeg, file_paths, location_id, location_path, - stage, - output, - }) - } - - async fn deserialize( - data: &[u8], - (db, sync): Self::DeserializeCtx, - ) -> Result { - rmp_serde::from_slice(data).map( - |SaveState { - id, - kind, - file_paths, - location_id, - location_path, - stage, - output, - }| Self { - id, - kind, - file_paths, - location_id, - location_path, - stage, - db, - sync, - output, - }, + db, + sync, ) } } @@ -412,7 +381,7 @@ fn filter_files_to_extract_media_data( Output { skipped, errors, .. }: &mut Output, -) -> HashMap { +) -> HashMap { let unique_objects_already_with_media_data = objects_already_with_media_data .into_iter() .collect::>(); @@ -447,7 +416,7 @@ fn filter_files_to_extract_media_data( ( location_path.join(iso_file_path), object.id, - from_bytes_to_uuid(&object.pub_id), + object.pub_id.as_slice().into(), ), ) }) @@ -464,7 +433,7 @@ enum ExtractionOutputKind { struct ExtractionOutput { file_path_id: file_path::id::Type, object_id: object::id::Type, - object_pub_id: Uuid, + object_pub_id: ObjectPubId, kind: ExtractionOutputKind, } @@ -481,7 +450,7 @@ enum InterruptRace { #[inline] fn prepare_extraction_futures<'a>( kind: Kind, - paths_by_id: &'a HashMap, + paths_by_id: &'a HashMap, interrupter: &'a Interrupter, ) -> FuturesUnordered + 'a> { paths_by_id @@ -491,7 +460,7 @@ fn prepare_extraction_futures<'a>( InterruptRace::Processed(ExtractionOutput { file_path_id: *file_path_id, object_id: *object_id, - object_pub_id: *object_pub_id, + object_pub_id: object_pub_id.clone(), kind: match kind { Kind::Exif => { ExtractionOutputKind::Exif(exif_media_data::extract(path).await) @@ -513,6 +482,7 @@ fn prepare_extraction_futures<'a>( .collect::>() } +#[instrument(skip_all, fields(%file_path_id, %object_id))] #[inline] fn process_output( ExtractionOutput { @@ -521,11 +491,13 @@ fn process_output( object_pub_id, kind, }: ExtractionOutput, - exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type, Uuid)>, + exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type, ObjectPubId)>, ffmpeg_media_datas: &mut Vec<(FFmpegMetadata, object::id::Type)>, extract_ids_to_remove_from_map: &mut Vec, output: &mut Output, ) { + trace!("Processing extracted media data"); + match kind { ExtractionOutputKind::Exif(Ok(Some(exif_data))) => { exif_media_datas.push((exif_data, object_id, object_pub_id)); @@ -548,14 +520,85 @@ fn process_output( #[inline] async fn save( kind: Kind, - exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type, Uuid)>, + exif_media_datas: &mut Vec<(ExifMetadata, object::id::Type, ObjectPubId)>, ffmpeg_media_datas: &mut Vec<(FFmpegMetadata, object::id::Type)>, db: &PrismaClient, sync: &SyncManager, ) -> Result { + trace!("Saving media data on database"); + match kind { Kind::Exif => exif_media_data::save(mem::take(exif_media_datas), db, sync).await, Kind::FFmpeg => ffmpeg_media_data::save(mem::take(ffmpeg_media_datas), db).await, } .map_err(Into::into) } + +#[derive(Debug, Serialize, Deserialize)] +struct SaveState { + id: TaskId, + kind: Kind, + file_paths: Vec, + location_id: location::id::Type, + location_path: Arc, + stage: Stage, + output: Output, +} + +impl SerializableTask for MediaDataExtractor { + type SerializeError = rmp_serde::encode::Error; + + type DeserializeError = rmp_serde::decode::Error; + + type DeserializeCtx = (Arc, Arc); + + async fn serialize(self) -> Result, Self::SerializeError> { + let Self { + id, + kind, + file_paths, + location_id, + location_path, + stage, + output, + .. + } = self; + + rmp_serde::to_vec_named(&SaveState { + id, + kind, + file_paths, + location_id, + location_path, + stage, + output, + }) + } + + async fn deserialize( + data: &[u8], + (db, sync): Self::DeserializeCtx, + ) -> Result { + rmp_serde::from_slice(data).map( + |SaveState { + id, + kind, + file_paths, + location_id, + location_path, + stage, + output, + }| Self { + id, + kind, + file_paths, + location_id, + location_path, + stage, + output, + db, + sync, + }, + ) + } +} diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs index 73ed518b00f2..c59b82b5e986 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs @@ -43,7 +43,7 @@ use futures_concurrency::future::Race; use serde::{Deserialize, Serialize}; use specta::Type; use tokio::time::Instant; -use tracing::{error, trace}; +use tracing::{error, instrument, trace, Level}; use uuid::Uuid; pub type ThumbnailId = u32; @@ -54,15 +54,24 @@ pub trait NewThumbnailReporter: Send + Sync + fmt::Debug + 'static { #[derive(Debug)] pub struct Thumbnailer { + // Task control id: TaskId, - reporter: Arc, + with_priority: bool, + + // Received input args thumbs_kind: ThumbnailKind, thumbnails_directory_path: Arc, thumbnails_to_generate: HashMap, - already_processed_ids: Vec, should_regenerate: bool, - with_priority: bool, + + // Inner state + already_processed_ids: Vec, + + // Out collector output: Output, + + // Dependencies + reporter: Arc, } #[async_trait::async_trait] @@ -79,6 +88,20 @@ impl Task for Thumbnailer { Some(THUMBNAILER_TASK_TIMEOUT) // The entire task must not take more than this constant } + #[instrument( + skip_all, + fields( + task_id = %self.id, + thumbs_kind = ?self.thumbs_kind, + should_regenerate = self.should_regenerate, + thumbnails_to_generate_count = self.thumbnails_to_generate.len(), + already_processed_ids_count = self.already_processed_ids.len(), + with_priority = self.with_priority, + ), + ret(level = Level::TRACE), + err, + )] + #[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above async fn run(&mut self, interrupter: &Interrupter) -> Result { enum InterruptRace { Interrupted(InterruptionKind), @@ -200,6 +223,8 @@ pub enum NonCriticalThumbnailerError { CreateShardDirectory(String), #[error("failed to save thumbnail : {1}", .0.display())] SaveThumbnail(PathBuf, String), + #[error("task timed out: {0}")] + TaskTimeout(TaskId), } impl Thumbnailer { @@ -325,6 +350,57 @@ impl Thumbnailer { } } +#[instrument(skip_all, fields(thumb_id = id, %generated, %skipped, ?elapsed_time, ?res))] +fn process_thumbnail_generation_output( + (id, (elapsed_time, res)): ThumbnailGenerationOutput, + with_priority: bool, + reporter: &dyn NewThumbnailReporter, + already_processed_ids: &mut Vec, + Output { + generated, + skipped, + errors, + mean_time_acc: mean_generation_time_accumulator, + std_dev_acc: std_dev_accumulator, + .. + }: &mut Output, +) { + let elapsed_time = elapsed_time.as_secs_f64(); + *mean_generation_time_accumulator += elapsed_time; + *std_dev_accumulator += elapsed_time * elapsed_time; + + match res { + Ok((thumb_key, status)) => { + match status { + GenerationStatus::Generated => { + *generated += 1; + } + GenerationStatus::Skipped => { + *skipped += 1; + } + } + + // This if is REALLY needed, due to the sheer performance of the thumbnailer, + // I restricted to only send events notifying for thumbnails in the current + // opened directory, sending events for the entire location turns into a + // humongous bottleneck in the frontend lol, since it doesn't even knows + // what to do with thumbnails for inner directories lol + // - fogodev + if with_priority { + reporter.new_thumbnail(thumb_key); + } + } + Err(e) => { + errors.push(media_processor::NonCriticalMediaProcessorError::from(e).into()); + *skipped += 1; + } + } + + already_processed_ids.push(id); + + trace!("Thumbnail processed"); +} + #[derive(Debug, Serialize, Deserialize)] struct SaveState { id: TaskId, @@ -406,51 +482,3 @@ type ThumbnailGenerationOutput = ( Result<(ThumbKey, GenerationStatus), NonCriticalThumbnailerError>, ), ); - -fn process_thumbnail_generation_output( - (id, (elapsed_time, res)): ThumbnailGenerationOutput, - with_priority: bool, - reporter: &dyn NewThumbnailReporter, - already_processed_ids: &mut Vec, - Output { - generated, - skipped, - errors, - mean_time_acc: mean_generation_time_accumulator, - std_dev_acc: std_dev_accumulator, - .. - }: &mut Output, -) { - let elapsed_time = elapsed_time.as_secs_f64(); - *mean_generation_time_accumulator += elapsed_time; - *std_dev_accumulator += elapsed_time * elapsed_time; - - match res { - Ok((thumb_key, status)) => { - match status { - GenerationStatus::Generated => { - *generated += 1; - } - GenerationStatus::Skipped => { - *skipped += 1; - } - } - - // This if is REALLY needed, due to the sheer performance of the thumbnailer, - // I restricted to only send events notifying for thumbnails in the current - // opened directory, sending events for the entire location turns into a - // humongous bottleneck in the frontend lol, since it doesn't even knows - // what to do with thumbnails for inner directories lol - // - fogodev - if with_priority { - reporter.new_thumbnail(thumb_key); - } - } - Err(e) => { - errors.push(media_processor::NonCriticalMediaProcessorError::from(e).into()); - *skipped += 1; - } - } - - already_processed_ids.push(id); -} diff --git a/core/crates/heavy-lifting/src/utils/sub_path.rs b/core/crates/heavy-lifting/src/utils/sub_path.rs index f9e607b4186a..91e3b88c7f39 100644 --- a/core/crates/heavy-lifting/src/utils/sub_path.rs +++ b/core/crates/heavy-lifting/src/utils/sub_path.rs @@ -1,4 +1,3 @@ -use rspc::ErrorCode; use sd_core_file_path_helper::{ ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, FilePathError, IsolatedFilePathData, @@ -9,6 +8,7 @@ use sd_prisma::prisma::{location, PrismaClient}; use std::path::{Path, PathBuf}; use prisma_client_rust::QueryError; +use rspc::ErrorCode; #[derive(thiserror::Error, Debug)] pub enum Error { diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index 4b141f7d5fce..db30596f9872 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -23,7 +23,7 @@ use sd_core_heavy_lifting::{ ThumbnailKind, }, }; -use sd_core_prisma_helpers::file_path_with_object; +use sd_core_prisma_helpers::{file_path_with_object, ObjectPubId}; use sd_file_ext::{ extensions::{AudioExtension, ImageExtension, VideoExtension}, @@ -37,7 +37,7 @@ use sd_sync::OperationFactory; use sd_utils::{ db::{inode_from_db, inode_to_db, maybe_missing}, error::FileIOError, - from_bytes_to_uuid, msgpack, uuid_to_bytes, + msgpack, }; #[cfg(target_family = "unix")] @@ -64,7 +64,6 @@ use tokio::{ time::Instant, }; use tracing::{debug, error, trace, warn}; -use uuid::Uuid; use super::{INode, HUNDRED_MILLIS}; @@ -264,7 +263,7 @@ async fn inner_create_file( } = if let Some(object) = existing_object { object } else { - let pub_id = uuid_to_bytes(&Uuid::new_v4()); + let pub_id: ObjectPubId = ObjectPubId::new(); let date_created: DateTime = DateTime::::from(fs_metadata.created_or_now()).into(); let int_kind = kind as i32; @@ -273,7 +272,7 @@ async fn inner_create_file( ( sync.shared_create( prisma_sync::object::SyncId { - pub_id: pub_id.clone(), + pub_id: pub_id.to_db(), }, [ (object::date_created::NAME, msgpack!(date_created)), @@ -282,7 +281,7 @@ async fn inner_create_file( ), db.object() .create( - pub_id.to_vec(), + pub_id.into(), vec![ object::date_created::set(Some(date_created)), object::kind::set(Some(int_kind)), @@ -355,7 +354,7 @@ async fn inner_create_file( .map_err(|e| error!("Failed to extract media data: {e:#?}")) { exif_media_data::save( - [(exif_data, object_id, from_bytes_to_uuid(&object_pub_id))], + [(exif_data, object_id, object_pub_id.into())], db, sync, ) @@ -618,7 +617,7 @@ async fn inner_update_file( .await?; } } else { - let pub_id = uuid_to_bytes(&Uuid::new_v4()); + let pub_id = ObjectPubId::new(); let date_created: DateTime = DateTime::::from(fs_metadata.created_or_now()).into(); @@ -627,7 +626,7 @@ async fn inner_update_file( ( sync.shared_create( prisma_sync::object::SyncId { - pub_id: pub_id.clone(), + pub_id: pub_id.to_db(), }, [ (object::date_created::NAME, msgpack!(date_created)), @@ -635,7 +634,7 @@ async fn inner_update_file( ], ), db.object().create( - pub_id.to_vec(), + pub_id.to_db(), vec![ object::date_created::set(Some(date_created)), object::kind::set(Some(int_kind)), @@ -653,12 +652,14 @@ async fn inner_update_file( }, file_path::object::NAME, msgpack!(prisma_sync::object::SyncId { - pub_id: pub_id.clone() + pub_id: pub_id.to_db() }), ), db.file_path().update( file_path::pub_id::equals(file_path.pub_id.clone()), - vec![file_path::object::connect(object::pub_id::equals(pub_id))], + vec![file_path::object::connect(object::pub_id::equals( + pub_id.into(), + ))], ), ) .await?; @@ -720,11 +721,7 @@ async fn inner_update_file( .map_err(|e| error!("Failed to extract media data: {e:#?}")) { exif_media_data::save( - [( - exif_data, - object.id, - from_bytes_to_uuid(&object.pub_id), - )], + [(exif_data, object.id, object.pub_id.as_slice().into())], db, sync, ) diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index f3086ca00f8e..688197aa90af 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -205,7 +205,7 @@ impl LocationCreateArgs { // Err(err)?; } - info!("Created location: {:?}", &location.data); + info!("Created location: {:?}", location.data.name); Ok(Some(location.data)) } else { diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index 026e2d589c47..ab15e2f28ed4 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -491,7 +491,7 @@ export type NonCriticalMediaDataExtractorError = { FailedToExtractImageMediaData export type NonCriticalMediaProcessorError = { media_data_extractor: NonCriticalMediaDataExtractorError } | { thumbnailer: NonCriticalThumbnailerError } -export type NonCriticalThumbnailerError = { MissingCasId: number } | { FailedToExtractIsolatedFilePathData: [number, string] } | { VideoThumbnailGenerationFailed: [string, string] } | { FormatImage: [string, string] } | { WebPEncoding: [string, string] } | { PanicWhileGeneratingThumbnail: [string, string] } | { CreateShardDirectory: string } | { SaveThumbnail: [string, string] } +export type NonCriticalThumbnailerError = { MissingCasId: number } | { FailedToExtractIsolatedFilePathData: [number, string] } | { VideoThumbnailGenerationFailed: [string, string] } | { FormatImage: [string, string] } | { WebPEncoding: [string, string] } | { PanicWhileGeneratingThumbnail: [string, string] } | { CreateShardDirectory: string } | { SaveThumbnail: [string, string] } | { TaskTimeout: string } export type NonIndexedPathItem = { path: string; name: string; extension: string; kind: number; is_dir: boolean; date_created: string; date_modified: string; size_in_bytes_bytes: number[]; hidden: boolean } From 471f88382c930ac4274eb38f25e6868d4afa7b71 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Wed, 29 May 2024 23:08:51 -0300 Subject: [PATCH 23/33] Cleanup --- Cargo.lock | 1 + .../src/file_identifier/cas_id.rs | 6 +- .../heavy-lifting/src/file_identifier/job.rs | 4 +- .../heavy-lifting/src/file_identifier/mod.rs | 10 +- .../src/file_identifier/tasks/identifier.rs | 6 +- .../file_identifier/tasks/object_processor.rs | 30 +- .../media_processor/helpers/thumbnailer.rs | 54 +- .../src/media_processor/tasks/thumbnailer.rs | 19 +- core/crates/prisma-helpers/Cargo.toml | 1 + core/crates/prisma-helpers/src/lib.rs | 64 +- core/src/api/labels.rs | 4 +- core/src/api/nodes.rs | 7 +- core/src/api/search/mod.rs | 50 +- core/src/api/tags.rs | 2 +- core/src/context.rs | 14 +- core/src/lib.rs | 50 +- core/src/library/library.rs | 14 +- core/src/location/indexer/mod.rs | 546 -------- core/src/location/indexer/old_indexer_job.rs | 660 ---------- core/src/location/indexer/old_shallow.rs | 197 --- core/src/location/indexer/old_walk.rs | 1120 ----------------- core/src/location/manager/watcher/android.rs | 4 +- core/src/location/manager/watcher/ios.rs | 2 +- core/src/location/manager/watcher/utils.rs | 19 +- core/src/location/mod.rs | 24 +- core/src/location/non_indexed.rs | 32 +- core/src/node/config.rs | 6 +- core/src/object/cas.rs | 62 - .../object/media/exif_metadata_extractor.rs | 164 --- .../object/media/ffmpeg_metadata_extractor.rs | 660 ---------- core/src/object/media/mod.rs | 294 ----- .../object/media/old_media_processor/job.rs | 679 ---------- .../object/media/old_media_processor/mod.rs | 109 -- .../media/old_media_processor/shallow.rs | 367 ------ .../object/media/old_thumbnail/clean_up.rs | 2 + .../object/media/old_thumbnail/directory.rs | 2 + core/src/object/media/old_thumbnail/mod.rs | 192 --- .../object/media/old_thumbnail/old_actor.rs | 335 ----- .../object/media/old_thumbnail/preferences.rs | 34 - .../src/object/media/old_thumbnail/process.rs | 483 ------- core/src/object/media/old_thumbnail/shard.rs | 13 - core/src/object/media/old_thumbnail/state.rs | 225 ---- core/src/object/media/old_thumbnail/worker.rs | 350 ------ core/src/object/mod.rs | 26 - core/src/object/old_file_identifier/mod.rs | 404 ------ .../old_file_identifier_job.rs | 339 ----- .../src/object/old_file_identifier/shallow.rs | 182 --- core/src/object/old_orphan_remover.rs | 2 + core/src/object/tag/mod.rs | 2 - core/src/old_job/error.rs | 6 - core/src/old_job/manager.rs | 11 +- core/src/old_job/mod.rs | 79 +- core/src/old_job/report.rs | 15 - core/src/old_job/worker.rs | 4 +- crates/p2p/crates/block/src/lib.rs | 23 +- crates/sync/src/compressed.rs | 6 + packages/client/src/core.ts | 4 +- 57 files changed, 274 insertions(+), 7746 deletions(-) delete mode 100644 core/src/location/indexer/mod.rs delete mode 100644 core/src/location/indexer/old_indexer_job.rs delete mode 100644 core/src/location/indexer/old_shallow.rs delete mode 100644 core/src/location/indexer/old_walk.rs delete mode 100644 core/src/object/cas.rs delete mode 100644 core/src/object/media/exif_metadata_extractor.rs delete mode 100644 core/src/object/media/ffmpeg_metadata_extractor.rs delete mode 100644 core/src/object/media/mod.rs delete mode 100644 core/src/object/media/old_media_processor/job.rs delete mode 100644 core/src/object/media/old_media_processor/mod.rs delete mode 100644 core/src/object/media/old_media_processor/shallow.rs delete mode 100644 core/src/object/media/old_thumbnail/mod.rs delete mode 100644 core/src/object/media/old_thumbnail/old_actor.rs delete mode 100644 core/src/object/media/old_thumbnail/preferences.rs delete mode 100644 core/src/object/media/old_thumbnail/process.rs delete mode 100644 core/src/object/media/old_thumbnail/shard.rs delete mode 100644 core/src/object/media/old_thumbnail/state.rs delete mode 100644 core/src/object/media/old_thumbnail/worker.rs delete mode 100644 core/src/object/old_file_identifier/mod.rs delete mode 100644 core/src/object/old_file_identifier/old_file_identifier_job.rs delete mode 100644 core/src/object/old_file_identifier/shallow.rs diff --git a/Cargo.lock b/Cargo.lock index 1c1864e753ba..b042dea7fee5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9265,6 +9265,7 @@ dependencies = [ "sd-prisma", "sd-utils", "serde", + "specta", "uuid", ] diff --git a/core/crates/heavy-lifting/src/file_identifier/cas_id.rs b/core/crates/heavy-lifting/src/file_identifier/cas_id.rs index 4321546e721e..7240c3483776 100644 --- a/core/crates/heavy-lifting/src/file_identifier/cas_id.rs +++ b/core/crates/heavy-lifting/src/file_identifier/cas_id.rs @@ -1,3 +1,5 @@ +use sd_core_prisma_helpers::CasId; + use std::path::Path; use blake3::Hasher; @@ -33,7 +35,7 @@ const_assert!(SAMPLE_SIZE > HEADER_OR_FOOTER_SIZE); pub async fn generate_cas_id( path: impl AsRef + Send, size: u64, -) -> Result { +) -> Result, io::Error> { let mut hasher = Hasher::new(); hasher.update(&size.to_le_bytes()); @@ -74,5 +76,5 @@ pub async fn generate_cas_id( hasher.update(&buf[..HEADER_OR_FOOTER_SIZE as usize]); } - Ok(hasher.finalize().to_hex()[..16].to_string()) + Ok(hasher.finalize().to_hex()[..16].to_string().into()) } diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index 874d2f1d2399..beef1650122c 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -80,7 +80,7 @@ pub struct FileIdentifier { sub_path: Option, // Inner state - file_paths_accumulator: HashMap>, + file_paths_accumulator: HashMap, Vec>, file_paths_ids_with_priority: HashSet, // Job control @@ -685,7 +685,7 @@ struct SaveState { location_path: Arc, sub_path: Option, - file_paths_accumulator: HashMap>, + file_paths_accumulator: HashMap, Vec>, file_paths_ids_with_priority: HashSet, phase: Phase, diff --git a/core/crates/heavy-lifting/src/file_identifier/mod.rs b/core/crates/heavy-lifting/src/file_identifier/mod.rs index bcdb365e964f..3ff3d33831a7 100644 --- a/core/crates/heavy-lifting/src/file_identifier/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/mod.rs @@ -28,7 +28,7 @@ pub mod job; mod shallow; mod tasks; -use cas_id::generate_cas_id; +pub use cas_id::generate_cas_id; pub use job::FileIdentifier; pub use shallow::shallow; @@ -77,7 +77,7 @@ pub enum NonCriticalFileIdentifierError { #[derive(Debug, Clone)] pub struct FileMetadata { - pub cas_id: Option, + pub cas_id: Option>, pub kind: ObjectKind, pub fs_metadata: Metadata, } @@ -197,7 +197,7 @@ async fn dispatch_object_processor_tasks( with_priority: bool, ) -> Vec> where - Iter: IntoIterator)> + Send, + Iter: IntoIterator, Vec)> + Send, Iter::IntoIter: Send, { let mut current_batch = HashMap::<_, Vec<_>>::new(); @@ -262,8 +262,8 @@ where } fn accumulate_file_paths_by_cas_id( - input: HashMap>, - accumulator: &mut HashMap>, + input: HashMap, Vec>, + accumulator: &mut HashMap, Vec>, ) { for (cas_id, file_paths) in input { match accumulator.entry(cas_id) { diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs index 2eb0b29febcd..9a47d9c48ffe 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/identifier.rs @@ -34,14 +34,14 @@ use super::{create_objects_and_update_file_paths, FilePathToCreateOrLinkObject}; #[derive(Debug, Serialize, Deserialize)] struct IdentifiedFile { file_path: file_path_for_file_identifier::Data, - cas_id: CasId, + cas_id: CasId<'static>, kind: ObjectKind, } impl IdentifiedFile { pub fn new( file_path: file_path_for_file_identifier::Data, - cas_id: impl Into, + cas_id: impl Into>, kind: ObjectKind, ) -> Self { Self { @@ -83,7 +83,7 @@ pub struct Output { /// Files that need to be aggregate between many identifier tasks to be processed by the /// object processor tasks - pub file_paths_by_cas_id: HashMap>, + pub file_paths_by_cas_id: HashMap, Vec>, /// Collected metric about time elapsed extracting metadata from file system pub extract_metadata_time: Duration, diff --git a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs index 601669a9a999..03d13c4c31db 100644 --- a/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs +++ b/core/crates/heavy-lifting/src/file_identifier/tasks/object_processor.rs @@ -25,7 +25,7 @@ pub struct ObjectProcessor { with_priority: bool, // Received input args - file_paths_by_cas_id: HashMap>, + file_paths_by_cas_id: HashMap, Vec>, // Inner state stage: Stage, @@ -42,7 +42,7 @@ pub struct ObjectProcessor { enum Stage { Starting, AssignFilePathsToExistingObjects { - existing_objects_by_cas_id: HashMap, + existing_objects_by_cas_id: HashMap, ObjectPubId>, }, CreateObjects, } @@ -191,7 +191,7 @@ impl Task for ObjectProcessor { impl ObjectProcessor { #[must_use] pub fn new( - file_paths_by_cas_id: HashMap>, + file_paths_by_cas_id: HashMap, Vec>, db: Arc, sync: Arc, with_priority: bool, @@ -213,15 +213,15 @@ impl ObjectProcessor { async fn fetch_existing_objects_by_cas_id<'cas_id, Iter>( cas_ids: Iter, db: &PrismaClient, -) -> Result, file_identifier::Error> +) -> Result, ObjectPubId>, file_identifier::Error> where - Iter: IntoIterator + Send, + Iter: IntoIterator> + Send, Iter::IntoIter: Send, { async fn inner( stringed_cas_ids: Vec, db: &PrismaClient, - ) -> Result, file_identifier::Error> { + ) -> Result, ObjectPubId>, file_identifier::Error> { db.object() .find_many(vec![object::file_paths::some(vec![ file_path::cas_id::in_vec(stringed_cas_ids), @@ -237,8 +237,14 @@ where .filter_map(|object_for_file_identifier::Data { pub_id, file_paths }| { file_paths .first() - .and_then(|file_path| file_path.cas_id.as_ref()) - .map(|cas_id| (cas_id.into(), pub_id.into())) + .and_then(|file_path| { + file_path + .cas_id + .as_ref() + .map(CasId::from) + .map(CasId::into_owned) + }) + .map(|cas_id| (cas_id, pub_id.into())) }) .collect() }) @@ -258,8 +264,8 @@ where /// connected to file paths with the same cas_id #[instrument(skip_all, err, fields(identified_files_count = file_paths_by_cas_id.len()))] async fn assign_existing_objects_to_file_paths( - file_paths_by_cas_id: &mut HashMap>, - objects_by_cas_id: &HashMap, + file_paths_by_cas_id: &mut HashMap, Vec>, + objects_by_cas_id: &HashMap, ObjectPubId>, db: &PrismaClient, sync: &SyncManager, ) -> Result, file_identifier::Error> { @@ -299,7 +305,7 @@ async fn assign_existing_objects_to_file_paths( } async fn assign_objects_to_duplicated_orphans( - file_paths_by_cas_id: &mut HashMap>, + file_paths_by_cas_id: &mut HashMap, Vec>, db: &PrismaClient, sync: &SyncManager, ) -> Result<(Vec, u64), file_identifier::Error> { @@ -357,7 +363,7 @@ async fn assign_objects_to_duplicated_orphans( #[derive(Debug, Serialize, Deserialize)] pub struct SaveState { id: TaskId, - file_paths_by_cas_id: HashMap>, + file_paths_by_cas_id: HashMap, Vec>, stage: Stage, output: Output, with_priority: bool, diff --git a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs index e79415abf55a..003ec4a282aa 100644 --- a/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/helpers/thumbnailer.rs @@ -1,5 +1,7 @@ use crate::media_processor::thumbnailer; +use sd_core_prisma_helpers::CasId; + use sd_file_ext::extensions::{ DocumentExtension, Extension, ImageExtension, ALL_DOCUMENT_EXTENSIONS, ALL_IMAGE_EXTENSIONS, }; @@ -94,16 +96,16 @@ pub static ALL_THUMBNAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { #[derive(Debug, Serialize, Deserialize, Type, Clone)] pub struct ThumbKey { pub shard_hex: String, - pub cas_id: String, + pub cas_id: CasId<'static>, pub base_directory_str: String, } impl ThumbKey { #[must_use] - pub fn new(cas_id: &str, kind: &ThumbnailKind) -> Self { + pub fn new(cas_id: CasId<'static>, kind: &ThumbnailKind) -> Self { Self { - shard_hex: get_shard_hex(cas_id).to_string(), - cas_id: cas_id.to_string(), + shard_hex: get_shard_hex(&cas_id).to_string(), + cas_id, base_directory_str: match kind { ThumbnailKind::Ephemeral => String::from(EPHEMERAL_DIR), ThumbnailKind::Indexed(library_id) => library_id.to_string(), @@ -112,19 +114,19 @@ impl ThumbKey { } #[must_use] - pub fn new_indexed(cas_id: &str, library_id: Uuid) -> Self { + pub fn new_indexed(cas_id: CasId<'static>, library_id: Uuid) -> Self { Self { - shard_hex: get_shard_hex(cas_id).to_string(), - cas_id: cas_id.to_string(), + shard_hex: get_shard_hex(&cas_id).to_string(), + cas_id, base_directory_str: library_id.to_string(), } } #[must_use] - pub fn new_ephemeral(cas_id: &str) -> Self { + pub fn new_ephemeral(cas_id: CasId<'static>) -> Self { Self { - shard_hex: get_shard_hex(cas_id).to_string(), - cas_id: cas_id.to_string(), + shard_hex: get_shard_hex(&cas_id).to_string(), + cas_id, base_directory_str: String::from(EPHEMERAL_DIR), } } @@ -137,7 +139,7 @@ pub enum ThumbnailKind { } impl ThumbnailKind { - pub fn compute_path(&self, data_directory: impl AsRef, cas_id: &str) -> PathBuf { + pub fn compute_path(&self, data_directory: impl AsRef, cas_id: &CasId<'_>) -> PathBuf { let mut thumb_path = get_thumbnails_directory(data_directory); match self { Self::Ephemeral => thumb_path.push(EPHEMERAL_DIR), @@ -146,7 +148,7 @@ impl ThumbnailKind { } } thumb_path.push(get_shard_hex(cas_id)); - thumb_path.push(cas_id); + thumb_path.push(cas_id.as_str()); thumb_path.set_extension(WEBP_EXTENSION); thumb_path @@ -154,15 +156,15 @@ impl ThumbnailKind { } #[derive(Debug, Serialize, Deserialize)] -pub struct GenerateThumbnailArgs { +pub struct GenerateThumbnailArgs<'cas_id> { pub extension: String, - pub cas_id: String, + pub cas_id: CasId<'cas_id>, pub path: PathBuf, } -impl GenerateThumbnailArgs { +impl<'cas_id> GenerateThumbnailArgs<'cas_id> { #[must_use] - pub const fn new(extension: String, cas_id: String, path: PathBuf) -> Self { + pub const fn new(extension: String, cas_id: CasId<'cas_id>, path: PathBuf) -> Self { Self { extension, cas_id, @@ -182,9 +184,9 @@ impl GenerateThumbnailArgs { /// named 000 to fff. #[inline] #[must_use] -pub fn get_shard_hex(cas_id: &str) -> &str { +pub fn get_shard_hex<'cas_id>(cas_id: &'cas_id CasId<'cas_id>) -> &'cas_id str { // Use the first three characters of the hash as the directory name - &cas_id[0..3] + &cas_id.as_str()[0..3] } #[cfg(feature = "ffmpeg")] @@ -227,7 +229,7 @@ pub async fn generate_thumbnail( extension, cas_id, path, - }: &GenerateThumbnailArgs, + }: &GenerateThumbnailArgs<'_>, kind: &ThumbnailKind, should_regenerate: bool, ) -> ( @@ -243,7 +245,7 @@ pub async fn generate_thumbnail( }; output_path.push(get_shard_hex(cas_id)); - output_path.push(cas_id); + output_path.push(cas_id.as_str()); output_path.set_extension(WEBP_EXTENSION); if let Err(e) = fs::metadata(&*output_path).await { @@ -258,7 +260,10 @@ pub async fn generate_thumbnail( trace!("Skipping thumbnail generation because it already exists"); return ( start.elapsed(), - Ok((ThumbKey::new(cas_id, kind), GenerationStatus::Skipped)), + Ok(( + ThumbKey::new(cas_id.to_owned(), kind), + GenerationStatus::Skipped, + )), ); } @@ -300,7 +305,10 @@ pub async fn generate_thumbnail( ( start.elapsed(), - Ok((ThumbKey::new(cas_id, kind), GenerationStatus::Generated)), + Ok(( + ThumbKey::new(cas_id.to_owned(), kind), + GenerationStatus::Generated, + )), ) } @@ -455,7 +463,7 @@ static LAST_SINGLE_THUMB_GENERATED_LOCK: Lazy> = pub async fn generate_single_thumbnail( thumbnails_directory: impl AsRef + Send, extension: String, - cas_id: String, + cas_id: CasId<'static>, path: impl AsRef + Send, kind: ThumbnailKind, ) -> Result<(), thumbnailer::NonCriticalThumbnailerError> { diff --git a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs index c59b82b5e986..22f3fc385a1d 100644 --- a/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs +++ b/core/crates/heavy-lifting/src/media_processor/tasks/thumbnailer.rs @@ -20,7 +20,7 @@ use crate::{ }; use sd_core_file_path_helper::IsolatedFilePathData; -use sd_core_prisma_helpers::file_path_for_media_processor; +use sd_core_prisma_helpers::{file_path_for_media_processor, CasId}; use sd_prisma::prisma::{file_path, location}; use sd_task_system::{ @@ -61,7 +61,7 @@ pub struct Thumbnailer { // Received input args thumbs_kind: ThumbnailKind, thumbnails_directory_path: Arc, - thumbnails_to_generate: HashMap, + thumbnails_to_generate: HashMap>, should_regenerate: bool, // Inner state @@ -231,7 +231,7 @@ impl Thumbnailer { fn new( thumbs_kind: ThumbnailKind, thumbnails_directory_path: Arc, - thumbnails_to_generate: HashMap, + thumbnails_to_generate: HashMap>, errors: Vec, should_regenerate: bool, with_priority: bool, @@ -256,7 +256,7 @@ impl Thumbnailer { #[must_use] pub fn new_ephemeral( thumbnails_directory_path: Arc, - thumbnails_to_generate: Vec, + thumbnails_to_generate: Vec>, reporter: Arc, ) -> Self { Self::new( @@ -299,7 +299,12 @@ impl Thumbnailer { file_paths .iter() .filter_map(|file_path| { - if let Some(cas_id) = file_path.cas_id.as_ref() { + if let Some(cas_id) = file_path + .cas_id + .as_ref() + .map(CasId::from) + .map(CasId::into_owned) + { let file_path_id = file_path.id; IsolatedFilePathData::try_from((location_id, file_path)) .map_err(|e| { @@ -335,7 +340,7 @@ impl Thumbnailer { file_path_id as u32, GenerateThumbnailArgs::new( iso_file_path.extension().to_string(), - cas_id.clone(), + cas_id, full_path, ), ) @@ -406,7 +411,7 @@ struct SaveState { id: TaskId, thumbs_kind: ThumbnailKind, thumbnails_directory_path: Arc, - thumbnails_to_generate: HashMap, + thumbnails_to_generate: HashMap>, should_regenerate: bool, with_priority: bool, output: Output, diff --git a/core/crates/prisma-helpers/Cargo.toml b/core/crates/prisma-helpers/Cargo.toml index 2d28f7765fd8..7561cecba5a9 100644 --- a/core/crates/prisma-helpers/Cargo.toml +++ b/core/crates/prisma-helpers/Cargo.toml @@ -15,4 +15,5 @@ sd-utils = { path = "../../../crates/utils" } prisma-client-rust = { workspace = true } serde = { workspace = true, features = ["derive"] } +specta = { workspace = true } uuid = { workspace = true, features = ["v4", "serde"] } diff --git a/core/crates/prisma-helpers/src/lib.rs b/core/crates/prisma-helpers/src/lib.rs index 6ac6d97304da..e225c2ac216d 100644 --- a/core/crates/prisma-helpers/src/lib.rs +++ b/core/crates/prisma-helpers/src/lib.rs @@ -31,9 +31,10 @@ use sd_prisma::prisma::{file_path, job, label, location, object}; use sd_utils::{from_bytes_to_uuid, uuid_to_bytes}; -use std::fmt; +use std::{borrow::Cow, fmt}; use serde::{Deserialize, Serialize}; +use specta::Type; use uuid::Uuid; // File Path selectables! @@ -299,49 +300,66 @@ label::include!((take: i64) => label_with_objects { } }); -#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Clone)] +#[derive(Debug, Serialize, Deserialize, Hash, PartialEq, Eq, Type)] #[serde(transparent)] -pub struct CasId(String); +pub struct CasId<'cas_id>(Cow<'cas_id, str>); + +impl Clone for CasId<'_> { + fn clone(&self) -> CasId<'static> { + CasId(Cow::Owned(self.0.clone().into_owned())) + } +} + +impl<'cas_id> CasId<'cas_id> { + #[must_use] + pub fn as_str(&self) -> &str { + self.0.as_ref() + } + + #[must_use] + pub fn to_owned(&self) -> CasId<'static> { + CasId(Cow::Owned(self.0.clone().into_owned())) + } -impl From for file_path::cas_id::Type { - fn from(CasId(cas_id): CasId) -> Self { - Some(cas_id) + #[must_use] + pub fn into_owned(self) -> CasId<'static> { + CasId(Cow::Owned(self.0.clone().into_owned())) } } -impl From<&CasId> for file_path::cas_id::Type { - fn from(CasId(cas_id): &CasId) -> Self { - Some(cas_id.clone()) +impl From<&CasId<'_>> for file_path::cas_id::Type { + fn from(CasId(cas_id): &CasId<'_>) -> Self { + Some(cas_id.clone().into_owned()) } } -impl From<&str> for CasId { - fn from(cas_id: &str) -> Self { - Self(cas_id.to_string()) +impl<'cas_id> From<&'cas_id str> for CasId<'cas_id> { + fn from(cas_id: &'cas_id str) -> Self { + Self(Cow::Borrowed(cas_id)) } } -impl From<&String> for CasId { - fn from(cas_id: &String) -> Self { - Self(cas_id.clone()) +impl<'cas_id> From<&'cas_id String> for CasId<'cas_id> { + fn from(cas_id: &'cas_id String) -> Self { + Self(Cow::Borrowed(cas_id)) } } -impl From for CasId { +impl From for CasId<'static> { fn from(cas_id: String) -> Self { - Self(cas_id) + Self(cas_id.into()) } } -impl From for String { - fn from(CasId(cas_id): CasId) -> Self { - cas_id +impl From> for String { + fn from(CasId(cas_id): CasId<'_>) -> Self { + cas_id.into_owned() } } -impl From<&CasId> for String { - fn from(CasId(cas_id): &CasId) -> Self { - cas_id.clone() +impl From<&CasId<'_>> for String { + fn from(CasId(cas_id): &CasId<'_>) -> Self { + cas_id.clone().into_owned() } } diff --git a/core/src/api/labels.rs b/core/src/api/labels.rs index 18ee29a96b6b..9aaaf30e3d6a 100644 --- a/core/src/api/labels.rs +++ b/core/src/api/labels.rs @@ -1,7 +1,7 @@ use crate::{invalidate_query, library::Library}; use sd_core_heavy_lifting::media_processor::ThumbKey; -use sd_core_prisma_helpers::label_with_objects; +use sd_core_prisma_helpers::{label_with_objects, CasId}; use sd_prisma::{ prisma::{label, label_on_object, object, SortOrder}, @@ -48,6 +48,8 @@ pub(crate) fn mount() -> AlphaRouter { file_path_data .cas_id .as_ref() + .map(CasId::from) + .map(CasId::into_owned) .map(|cas_id| ThumbKey::new_indexed(cas_id, library.id)) }) // Filter out None values and transform each element to Vec> .collect::>(), // Collect into Vec>> diff --git a/core/src/api/nodes.rs b/core/src/api/nodes.rs index 00b42781c4ff..b518490694c6 100644 --- a/core/src/api/nodes.rs +++ b/core/src/api/nodes.rs @@ -179,12 +179,7 @@ pub(crate) fn mount() -> AlphaRouter { |node, UpdateThumbnailerPreferences { .. }: UpdateThumbnailerPreferences| async move { node.config .update_preferences(|_| { - // TODO(fogodev): remove this crap - // preferences - // .thumbnailer - // .set_background_processing_percentage( - // background_processing_percentage, - // ); + // TODO(fogodev): introduce configurable workers count to task system }) .await .map_err(|e| { diff --git a/core/src/api/search/mod.rs b/core/src/api/search/mod.rs index 4781c450c4e3..d4d2a29fba70 100644 --- a/core/src/api/search/mod.rs +++ b/core/src/api/search/mod.rs @@ -7,7 +7,7 @@ use crate::{ use prisma_client_rust::Operator; use sd_core_heavy_lifting::media_processor::ThumbKey; -use sd_core_prisma_helpers::{file_path_for_frontend, object_with_file_paths}; +use sd_core_prisma_helpers::{file_path_for_frontend, object_with_file_paths, CasId}; use sd_prisma::prisma::{self, PrismaClient}; use std::path::PathBuf; @@ -217,21 +217,23 @@ pub fn mount() -> AlphaRouter { let mut items = Vec::with_capacity(file_paths.len()); for file_path in file_paths { - let has_created_thumbnail = if let Some(cas_id) = &file_path.cas_id { - library - .thumbnail_exists(&node, cas_id) - .await - .map_err(LocationError::from)? - } else { - false - }; + let has_created_thumbnail = + if let Some(cas_id) = file_path.cas_id.as_ref().map(CasId::from) { + library + .thumbnail_exists(&node, &cas_id) + .await + .map_err(LocationError::from)? + } else { + false + }; items.push(ExplorerItem::Path { thumbnail: file_path .cas_id .as_ref() - // .filter(|_| thumbnail_exists_locally) - .map(|i| ThumbKey::new_indexed(i, library.id)), + .map(CasId::from) + .map(CasId::into_owned) + .map(|cas_id| ThumbKey::new_indexed(cas_id, library.id)), has_created_thumbnail, item: Box::new(file_path), }) @@ -332,23 +334,27 @@ pub fn mount() -> AlphaRouter { .file_paths .iter() .map(|fp| fp.cas_id.as_ref()) - .find_map(|c| c); - - let has_created_thumbnail = if let Some(cas_id) = cas_id { - library.thumbnail_exists(&node, cas_id).await.map_err(|e| { - rspc::Error::with_cause( - ErrorCode::InternalServerError, - "Failed to check that thumbnail exists".to_string(), - e, - ) - })? + .find_map(|c| c) + .map(CasId::from) + .map(|cas_id| cas_id.to_owned()); + + let has_created_thumbnail = if let Some(cas_id) = &cas_id { + library + .thumbnail_exists(&node, cas_id) + .await + .map_err(|e| { + rspc::Error::with_cause( + ErrorCode::InternalServerError, + "Failed to check that thumbnail exists".to_string(), + e, + ) + })? } else { false }; items.push(ExplorerItem::Object { thumbnail: cas_id - // .filter(|_| thumbnail_exists_locally) .map(|cas_id| ThumbKey::new_indexed(cas_id, library.id)), item: object, has_created_thumbnail, diff --git a/core/src/api/tags.rs b/core/src/api/tags.rs index 0af6f6d8761e..b951368f2b89 100644 --- a/core/src/api/tags.rs +++ b/core/src/api/tags.rs @@ -4,7 +4,7 @@ use sd_prisma::{ prisma::{file_path, object, tag, tag_on_object}, prisma_sync, }; -use sd_sync::{option_sync_db_entry, option_sync_entry, sync_entry, OperationFactory}; +use sd_sync::{option_sync_db_entry, OperationFactory}; use sd_utils::{msgpack, uuid_to_bytes}; use std::collections::BTreeMap; diff --git a/core/src/context.rs b/core/src/context.rs index 995e09ac4af8..c308d3c5cf04 100644 --- a/core/src/context.rs +++ b/core/src/context.rs @@ -147,13 +147,13 @@ impl sd_core_heavy_lifting::JobContext< } ProgressUpdate::Message(message) => { - trace!("job {} message: {}", report.id, message); + trace!(job_id = %report.id, %message, "job message"); report.message = message; } ProgressUpdate::Phase(phase) => { trace!( - "changing Job phase: {} -> {phase}", - report.id, + job_id = %report.id, + "changing phase: {} -> {phase}", report.phase ); report.phase = phase; @@ -168,7 +168,10 @@ impl sd_core_heavy_lifting::JobContext< let task_count = report.task_count as usize; let completed_task_count = report.completed_task_count as usize; let remaining_task_count = task_count.saturating_sub(completed_task_count); - let remaining_time_per_task = elapsed / (completed_task_count + 1) as i32; // Adding 1 to avoid division by zero + + // Adding 1 to avoid division by zero + let remaining_time_per_task = elapsed / (completed_task_count + 1) as i32; + let remaining_time = remaining_time_per_task * remaining_task_count as i32; // Update the report with estimated remaining time @@ -189,7 +192,8 @@ impl sd_core_heavy_lifting::JobContext< async move { if let Err(e) = report.update(&db).await { error!( - "Failed to update job report on debounced job progress event: {e:#?}" + ?e, + "Failed to update job report on debounced job progress event" ); } } diff --git a/core/src/lib.rs b/core/src/lib.rs index f5d6c532f481..68910b61d613 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -4,11 +4,10 @@ use crate::{ api::{CoreEvent, Router}, location::LocationManagerError, - // object::media::old_thumbnail::old_actor::OldThumbnailer, }; -use futures_concurrency::future::Join; -use sd_core_heavy_lifting::JobSystem; +use sd_core_heavy_lifting::{media_processor::ThumbnailKind, JobSystem}; +use sd_core_prisma_helpers::CasId; #[cfg(feature = "ai")] use sd_ai::old_image_labeler::{DownloadModelError, OldImageLabeler, YoloV8}; @@ -16,18 +15,15 @@ use sd_ai::old_image_labeler::{DownloadModelError, OldImageLabeler, YoloV8}; use sd_task_system::TaskSystem; use sd_utils::error::FileIOError; -use api::notifications::{Notification, NotificationData, NotificationId}; -use chrono::{DateTime, Utc}; -use node::config; -use notifications::Notifications; -use reqwest::{RequestBuilder, Response}; - use std::{ fmt, path::{Path, PathBuf}, sync::{atomic::AtomicBool, Arc}, }; +use chrono::{DateTime, Utc}; +use futures_concurrency::future::Join; +use reqwest::{RequestBuilder, Response}; use thiserror::Error; use tokio::{fs, io, sync::broadcast}; use tracing::{error, info, warn}; @@ -58,8 +54,10 @@ pub(crate) mod volume; pub use env::Env; +use api::notifications::{Notification, NotificationData, NotificationId}; use context::{JobContext, NodeContext}; -use object::media::get_ephemeral_thumbnail_path; +use node::config; +use notifications::Notifications; pub(crate) use sd_core_sync as sync; @@ -74,7 +72,6 @@ pub struct Node { pub p2p: Arc, pub event_bus: (broadcast::Sender, broadcast::Receiver), pub notifications: Notifications, - // pub thumbnailer: OldThumbnailer, pub cloud_sync_flag: Arc, pub env: Arc, pub http: reqwest::Client, @@ -141,13 +138,6 @@ impl Node { locations, notifications: notifications::Notifications::new(), p2p, - // thumbnailer: OldThumbnailer::new( - // data_dir, - // libraries.clone(), - // event_bus.0.clone(), - // config.preferences_watcher(), - // ) - // .await, config, event_bus, libraries, @@ -250,7 +240,15 @@ impl Node { std::env::set_var( "RUST_LOG", - format!("info,sd_core={level},sd_p2p=debug,sd_core::location::manager=info,sd_ai={level}"), + format!( + "info,\ + sd_core={level},\ + sd_p2p=debug,\ + sd_core::location::manager=info,\ + sd_core_heavy_lifting=debug,\ + sd_task_system=debug,\ + sd_ai={level}" + ), ); } @@ -310,12 +308,16 @@ impl Node { pub(crate) fn emit(&self, event: CoreEvent) { if let Err(e) = self.event_bus.0.send(event) { - warn!("Error sending event to event bus: {e:?}"); + warn!(?e, "Error sending event to event bus"); } } - pub async fn ephemeral_thumbnail_exists(&self, cas_id: &str) -> Result { - let thumb_path = get_ephemeral_thumbnail_path(self, cas_id); + pub async fn ephemeral_thumbnail_exists( + &self, + cas_id: &CasId<'_>, + ) -> Result { + let thumb_path = + ThumbnailKind::Ephemeral.compute_path(self.config.data_directory(), cas_id); match fs::metadata(&thumb_path).await { Ok(_) => Ok(true), @@ -340,8 +342,8 @@ impl Node { Ok(_) => { self.notifications._internal_send(notification); } - Err(err) => { - error!("Error saving notification to config: {:?}", err); + Err(e) => { + error!(?e, "Error saving notification to config"); } } } diff --git a/core/src/library/library.rs b/core/src/library/library.rs index 841b9f26e1e5..98cc89c1731e 100644 --- a/core/src/library/library.rs +++ b/core/src/library/library.rs @@ -1,7 +1,8 @@ -use crate::{api::CoreEvent, cloud, object::media::get_indexed_thumbnail_path, sync, Node}; +use crate::{api::CoreEvent, cloud, sync, Node}; use sd_core_file_path_helper::IsolatedFilePathData; -use sd_core_prisma_helpers::file_path_to_full_path; +use sd_core_heavy_lifting::media_processor::ThumbnailKind; +use sd_core_prisma_helpers::{file_path_to_full_path, CasId}; use sd_p2p::Identity; use sd_prisma::prisma::{file_path, location, PrismaClient}; @@ -123,8 +124,13 @@ impl Library { } } - pub async fn thumbnail_exists(&self, node: &Node, cas_id: &str) -> Result { - let thumb_path = get_indexed_thumbnail_path(node, cas_id, self.id); + pub async fn thumbnail_exists( + &self, + node: &Node, + cas_id: &CasId<'_>, + ) -> Result { + let thumb_path = + ThumbnailKind::Indexed(self.id).compute_path(node.config.data_directory(), cas_id); match fs::metadata(&thumb_path).await { Ok(_) => Ok(true), diff --git a/core/src/location/indexer/mod.rs b/core/src/location/indexer/mod.rs deleted file mode 100644 index a469cec98cfd..000000000000 --- a/core/src/location/indexer/mod.rs +++ /dev/null @@ -1,546 +0,0 @@ -use crate::library::Library; - -use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData, IsolatedFilePathDataParts}; -use sd_core_indexer_rules::IndexerRuleError; -use sd_core_prisma_helpers::file_path_pub_and_cas_ids; - -use sd_prisma::{ - prisma::{file_path, location, PrismaClient}, - prisma_sync, -}; -use sd_sync::*; -use sd_utils::{db::inode_to_db, error::FileIOError, from_bytes_to_uuid, msgpack}; - -use std::{collections::HashMap, path::Path}; - -use chrono::Utc; -use futures_concurrency::future::TryJoin; -use itertools::Itertools; -use prisma_client_rust::operator::or; -use rspc::ErrorCode; -use serde::{Deserialize, Serialize}; -use thiserror::Error; -use tracing::{trace, warn}; - -use super::location_with_indexer_rules; - -pub mod old_indexer_job; -mod old_shallow; -mod old_walk; - -use old_walk::WalkedEntry; - -// pub use old_indexer_job::OldIndexerJobInit; -// pub use old_shallow::*; - -#[derive(Serialize, Deserialize, Debug)] -pub struct OldIndexerJobSaveStep { - chunk_idx: usize, - walked: Vec, -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct OldIndexerJobUpdateStep { - chunk_idx: usize, - to_update: Vec, -} - -/// Error type for the indexer module -#[derive(Error, Debug)] -pub enum IndexerError { - // Not Found errors - #[error("indexer rule not found: ")] - IndexerRuleNotFound(i32), - #[error("received sub path not in database: ", .0.display())] - SubPathNotFound(Box), - - // Internal Errors - #[error("Database Error: {}", .0.to_string())] - Database(#[from] prisma_client_rust::QueryError), - #[error(transparent)] - FileIO(#[from] FileIOError), - #[error(transparent)] - FilePath(#[from] FilePathError), - - // Mixed errors - #[error(transparent)] - IndexerRules(#[from] IndexerRuleError), -} - -impl From for rspc::Error { - fn from(err: IndexerError) -> Self { - match err { - IndexerError::IndexerRuleNotFound(_) | IndexerError::SubPathNotFound(_) => { - rspc::Error::with_cause(ErrorCode::NotFound, err.to_string(), err) - } - - IndexerError::IndexerRules(rule_err) => rule_err.into(), - - _ => rspc::Error::with_cause(ErrorCode::InternalServerError, err.to_string(), err), - } - } -} - -async fn execute_indexer_save_step( - location: &location_with_indexer_rules::Data, - OldIndexerJobSaveStep { walked, .. }: &OldIndexerJobSaveStep, - library: &Library, -) -> Result { - let Library { sync, db, .. } = library; - - let (sync_stuff, paths): (Vec<_>, Vec<_>) = walked - .iter() - .map(|entry| { - let IsolatedFilePathDataParts { - materialized_path, - is_dir, - name, - extension, - .. - } = &entry.iso_file_path.to_parts(); - - use file_path::*; - - let pub_id = sd_utils::uuid_to_bytes(entry.pub_id); - - let (sync_params, db_params): (Vec<_>, Vec<_>) = [ - ( - ( - location::NAME, - msgpack!(prisma_sync::location::SyncId { - pub_id: location.pub_id.clone() - }), - ), - location_id::set(Some(location.id)), - ), - sync_db_entry!(materialized_path.to_string(), materialized_path), - sync_db_entry!(name.to_string(), name), - sync_db_entry!(*is_dir, is_dir), - sync_db_entry!(extension.to_string(), extension), - sync_db_entry!( - entry.metadata.size_in_bytes.to_be_bytes().to_vec(), - size_in_bytes_bytes - ), - sync_db_entry!(inode_to_db(entry.metadata.inode), inode), - { - let v = entry.metadata.created_at.into(); - sync_db_entry!(v, date_created) - }, - { - let v = entry.metadata.modified_at.into(); - sync_db_entry!(v, date_modified) - }, - { - let v = Utc::now().into(); - sync_db_entry!(v, date_indexed) - }, - sync_db_entry!(entry.metadata.hidden, hidden), - ] - .into_iter() - .unzip(); - - ( - sync.shared_create( - prisma_sync::file_path::SyncId { - pub_id: sd_utils::uuid_to_bytes(entry.pub_id), - }, - sync_params, - ), - file_path::create_unchecked(pub_id, db_params), - ) - }) - .unzip(); - - let count = sync - .write_ops( - db, - ( - sync_stuff.into_iter().flatten().collect(), - db.file_path().create_many(paths).skip_duplicates(), - ), - ) - .await?; - - trace!("Inserted {count} records"); - - Ok(count) -} - -async fn execute_indexer_update_step( - update_step: &OldIndexerJobUpdateStep, - Library { sync, db, .. }: &Library, -) -> Result { - let (sync_stuff, paths_to_update): (Vec<_>, Vec<_>) = update_step - .to_update - .iter() - .map(|entry| async move { - let IsolatedFilePathDataParts { is_dir, .. } = &entry.iso_file_path.to_parts(); - - let pub_id = sd_utils::uuid_to_bytes(entry.pub_id); - - let should_unlink_object = if let Some(object_id) = entry.maybe_object_id { - db.file_path() - .count(vec![file_path::object_id::equals(Some(object_id))]) - .exec() - .await? > 1 - } else { - false - }; - - use file_path::*; - - let (sync_params, db_params): (Vec<_>, Vec<_>) = [ - // As this file was updated while Spacedrive was offline, we mark the object_id and cas_id as null - // So this file_path will be updated at file identifier job - should_unlink_object - .then_some(((object_id::NAME, msgpack!(nil)), object::disconnect())), - Some(((cas_id::NAME, msgpack!(nil)), cas_id::set(None))), - Some(sync_db_entry!(*is_dir, is_dir)), - Some(sync_db_entry!( - entry.metadata.size_in_bytes.to_be_bytes().to_vec(), - size_in_bytes_bytes - )), - Some(sync_db_entry!(inode_to_db(entry.metadata.inode), inode)), - Some({ - let v = entry.metadata.created_at.into(); - sync_db_entry!(v, date_created) - }), - Some({ - let v = entry.metadata.modified_at.into(); - sync_db_entry!(v, date_modified) - }), - Some(sync_db_entry!(entry.metadata.hidden, hidden)), - ] - .into_iter() - .flatten() - .unzip(); - - Ok::<_, IndexerError>(( - sync_params - .into_iter() - .map(|(field, value)| { - sync.shared_update( - prisma_sync::file_path::SyncId { - pub_id: pub_id.clone(), - }, - field, - value, - ) - }) - .collect::>(), - db.file_path() - .update(file_path::pub_id::equals(pub_id), db_params) - .select(file_path::select!({ id })), - )) - }) - .collect::>() - .try_join() - .await? - .into_iter() - .unzip(); - - let updated = sync - .write_ops( - db, - (sync_stuff.into_iter().flatten().collect(), paths_to_update), - ) - .await?; - - trace!("Updated {updated:?} records"); - - Ok(updated.len() as i64) -} - -fn iso_file_path_factory( - location_id: location::id::Type, - location_path: &Path, -) -> impl Fn(&Path, bool) -> Result, IndexerError> + '_ { - move |path, is_dir| { - IsolatedFilePathData::new(location_id, location_path, path, is_dir).map_err(Into::into) - } -} - -async fn remove_non_existing_file_paths( - to_remove: impl IntoIterator, - db: &PrismaClient, - sync: &sd_core_sync::Manager, -) -> Result { - let (sync_params, db_params): (Vec<_>, Vec<_>) = to_remove - .into_iter() - .map(|d| { - ( - sync.shared_delete(prisma_sync::file_path::SyncId { pub_id: d.pub_id }), - d.id, - ) - }) - .unzip(); - - sync.write_ops( - db, - ( - sync_params, - db.file_path() - .delete_many(vec![file_path::id::in_vec(db_params)]), - ), - ) - .await?; - - Ok(0) -} - -// TODO: Change this macro to a fn when we're able to return -// `impl Fn(Vec) -> impl Future, IndexerError>>` -// Maybe when TAITs arrive -#[macro_export] -macro_rules! file_paths_db_fetcher_fn { - ($db:expr) => {{ - |found_paths| async { - // Each found path is a AND with 4 terms, and SQLite has a expression tree limit of 1000 terms - // so we will use chunks of 200 just to be safe - - // FIXME: Can't pass this chunks variable direct to _batch because of lifetime issues - let chunks = found_paths - .into_iter() - .chunks(200) - .into_iter() - .map(|founds| { - $db.file_path() - .find_many(vec![::prisma_client_rust::operator::or( - founds.collect::>(), - )]) - .select(::sd_core_prisma_helpers::file_path_walker::select()) - }) - .collect::>(); - - $db._batch(chunks) - .await - .map(|fetched| fetched.into_iter().flatten().collect::>()) - .map_err(Into::into) - } - }}; -} - -// TODO: Change this macro to a fn when we're able to return -// `impl Fn(&Path, Vec) -> impl Future, IndexerError>>` -// Maybe when TAITs arrive -// FIXME: (fogodev) I was receiving this error here https://github.com/rust-lang/rust/issues/74497 -#[macro_export] -macro_rules! to_remove_db_fetcher_fn { - ($location_id:expr, $db:expr) => {{ - |parent_iso_file_path, unique_location_id_materialized_path_name_extension_params| async { - let location_id: ::sd_prisma::prisma::location::id::Type = $location_id; - let db: &::sd_prisma::prisma::PrismaClient = $db; - let parent_iso_file_path: ::sd_core_file_path_helper::IsolatedFilePathData< - 'static, - > = parent_iso_file_path; - let unique_location_id_materialized_path_name_extension_params: ::std::vec::Vec< - ::sd_prisma::prisma::file_path::WhereParam, - > = unique_location_id_materialized_path_name_extension_params; - - // FIXME: Can't pass this chunks variable direct to _batch because of lifetime issues - let chunks = unique_location_id_materialized_path_name_extension_params - .into_iter() - .chunks(200) - .into_iter() - .map(|unique_params| { - db.file_path() - .find_many(vec![::prisma_client_rust::operator::or( - unique_params.collect(), - )]) - .select(::sd_prisma::prisma::file_path::select!({ id })) - }) - .collect::<::std::vec::Vec<_>>(); - - let founds_ids = db._batch(chunks).await.map(|founds_chunk| { - founds_chunk - .into_iter() - .map(|file_paths| file_paths.into_iter().map(|file_path| file_path.id)) - .flatten() - .collect::<::std::collections::HashSet<_>>() - })?; - - // NOTE: This batch size can be increased if we wish to trade memory for more performance - const BATCH_SIZE: i64 = 1000; - - let mut to_remove = vec![]; - let mut cursor = 1; - - loop { - let found = $db.file_path() - .find_many(vec![ - ::sd_prisma::prisma::file_path::location_id::equals(Some(location_id)), - ::sd_prisma::prisma::file_path::materialized_path::equals(Some( - parent_iso_file_path - .materialized_path_for_children() - .expect("the received isolated file path must be from a directory"), - )), - ]) - .order_by(::sd_prisma::prisma::file_path::id::order(::sd_prisma::prisma::SortOrder::Asc)) - .take(BATCH_SIZE) - .cursor(::sd_prisma::prisma::file_path::id::equals(cursor)) - .select(::sd_prisma::prisma::file_path::select!({ id pub_id cas_id })) - .exec() - .await?; - - let should_stop = (found.len() as i64) < BATCH_SIZE; - - if let Some(last) = found.last() { - cursor = last.id; - } else { - break; - } - - to_remove.extend( - found - .into_iter() - .filter(|file_path| !founds_ids.contains(&file_path.id)) - .map(|file_path| ::sd_core_prisma_helpers::file_path_pub_and_cas_ids::Data { - id: file_path.id, - pub_id: file_path.pub_id, - cas_id: file_path.cas_id, - }), - ); - - if should_stop { - break; - } - } - - Ok(to_remove) - } - }}; -} - -pub async fn reverse_update_directories_sizes( - base_path: impl AsRef, - location_id: location::id::Type, - location_path: impl AsRef, - library: &Library, -) -> Result<(), FilePathError> { - let base_path = base_path.as_ref(); - let location_path = location_path.as_ref(); - - let Library { sync, db, .. } = library; - - let ancestors = base_path - .ancestors() - .take_while(|&ancestor| ancestor != location_path) - .map(|ancestor| IsolatedFilePathData::new(location_id, location_path, ancestor, true)) - .collect::, _>>()?; - - let chunked_queries = ancestors - .iter() - .chunks(200) - .into_iter() - .map(|ancestors_iso_file_paths_chunk| { - db.file_path() - .find_many(vec![or(ancestors_iso_file_paths_chunk - .into_iter() - .map(file_path::WhereParam::from) - .collect::>())]) - .select(file_path::select!({ pub_id materialized_path name })) - }) - .collect::>(); - - let mut pub_id_by_ancestor_materialized_path = db - ._batch(chunked_queries) - .await? - .into_iter() - .flatten() - .filter_map( - |file_path| match (file_path.materialized_path, file_path.name) { - (Some(materialized_path), Some(name)) => { - Some((format!("{materialized_path}{name}/"), (file_path.pub_id, 0))) - } - _ => { - warn!( - "Found a file_path missing its materialized_path or name: ", - from_bytes_to_uuid(&file_path.pub_id) - ); - None - } - }, - ) - .collect::>(); - - db.file_path() - .find_many(vec![ - file_path::location_id::equals(Some(location_id)), - file_path::materialized_path::in_vec( - ancestors - .iter() - .map(|ancestor_iso_file_path| { - ancestor_iso_file_path - .materialized_path_for_children() - .expect("each ancestor is a directory") - }) - .collect(), - ), - ]) - .select(file_path::select!({ materialized_path size_in_bytes_bytes })) - .exec() - .await? - .into_iter() - .for_each(|file_path| { - if let Some(materialized_path) = file_path.materialized_path { - if let Some((_, size)) = - pub_id_by_ancestor_materialized_path.get_mut(&materialized_path) - { - *size += file_path - .size_in_bytes_bytes - .map(|size_in_bytes_bytes| { - u64::from_be_bytes([ - size_in_bytes_bytes[0], - size_in_bytes_bytes[1], - size_in_bytes_bytes[2], - size_in_bytes_bytes[3], - size_in_bytes_bytes[4], - size_in_bytes_bytes[5], - size_in_bytes_bytes[6], - size_in_bytes_bytes[7], - ]) - }) - .unwrap_or_else(|| { - warn!("Got a directory missing its size in bytes"); - 0 - }); - } - } else { - warn!("Corrupt database possessing a file_path entry without materialized_path"); - } - }); - - let to_sync_and_update = ancestors - .into_iter() - .filter_map(|ancestor_iso_file_path| { - if let Some((pub_id, size)) = pub_id_by_ancestor_materialized_path.remove( - &ancestor_iso_file_path - .materialized_path_for_children() - .expect("each ancestor is a directory"), - ) { - let size_bytes = size.to_be_bytes().to_vec(); - - Some(( - sync.shared_update( - prisma_sync::file_path::SyncId { - pub_id: pub_id.clone(), - }, - file_path::size_in_bytes_bytes::NAME, - msgpack!(size_bytes.clone()), - ), - db.file_path().update( - file_path::pub_id::equals(pub_id), - vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))], - ), - )) - } else { - warn!("Got a missing ancestor for a file_path in the database, maybe we have a corruption"); - None - } - }) - .unzip::<_, _, Vec<_>, Vec<_>>(); - - sync.write_ops(db, to_sync_and_update).await?; - - Ok(()) -} diff --git a/core/src/location/indexer/old_indexer_job.rs b/core/src/location/indexer/old_indexer_job.rs deleted file mode 100644 index b86d565ae569..000000000000 --- a/core/src/location/indexer/old_indexer_job.rs +++ /dev/null @@ -1,660 +0,0 @@ -// use crate::{ -// file_paths_db_fetcher_fn, invalidate_query, -// library::Library, -// location::{location_with_indexer_rules, update_location_size, ScanState}, -// old_job::{ -// CurrentStep, JobError, JobInitOutput, JobReportUpdate, JobResult, JobRunMetadata, -// JobStepOutput, StatefulJob, WorkerContext, -// }, -// to_remove_db_fetcher_fn, -// }; - -// use sd_core_file_path_helper::{ -// ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, -// IsolatedFilePathData, -// }; -// use sd_core_indexer_rules::IndexerRule; - -// use sd_prisma::{ -// prisma::{file_path, location}, -// prisma_sync, -// }; -// use sd_sync::*; -// use sd_utils::{db::maybe_missing, from_bytes_to_uuid, msgpack}; - -// use std::{ -// collections::HashMap, -// hash::{Hash, Hasher}, -// path::{Path, PathBuf}, -// sync::Arc, -// time::Duration, -// }; - -// use itertools::Itertools; -// use prisma_client_rust::operator::or; -// use serde::{Deserialize, Serialize}; -// use serde_json::json; -// use tokio::time::Instant; -// use tracing::{debug, info, warn}; - -// use super::{ -// execute_indexer_save_step, execute_indexer_update_step, iso_file_path_factory, -// old_walk::{keep_walking, walk, ToWalkEntry, WalkResult}, -// remove_non_existing_file_paths, reverse_update_directories_sizes, IndexerError, -// OldIndexerJobSaveStep, OldIndexerJobUpdateStep, -// }; - -// /// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. -// const BATCH_SIZE: usize = 1000; - -// /// `IndexerJobInit` receives a `location::Data` object to be indexed -// /// and possibly a `sub_path` to be indexed. The `sub_path` is used when -// /// we want do index just a part of a location. -// #[derive(Serialize, Deserialize, Debug)] -// pub struct OldIndexerJobInit { -// pub location: location_with_indexer_rules::Data, -// pub sub_path: Option, -// } - -// impl Hash for OldIndexerJobInit { -// fn hash(&self, state: &mut H) { -// self.location.id.hash(state); -// if let Some(ref sub_path) = self.sub_path { -// sub_path.hash(state); -// } -// } -// } - -// /// `IndexerJobData` contains the state of the indexer job, which includes a `location_path` that -// /// is cached and casted on `PathBuf` from `local_path` column in the `location` table. It also -// /// contains some metadata for logging purposes. -// #[derive(Serialize, Deserialize, Debug)] -// pub struct OldIndexerJobData { -// location_path: PathBuf, -// indexed_path: PathBuf, -// indexer_rules: Vec, -// } - -// #[derive(Serialize, Deserialize, Default, Debug)] -// pub struct OldIndexerJobRunMetadata { -// db_write_time: Duration, -// scan_read_time: Duration, -// total_paths: u64, -// total_updated_paths: u64, -// total_save_steps: u64, -// total_update_steps: u64, -// indexed_count: u64, -// updated_count: u64, -// removed_count: u64, -// paths_and_sizes: HashMap, -// } - -// impl JobRunMetadata for OldIndexerJobRunMetadata { -// fn update(&mut self, new_data: Self) { -// self.db_write_time += new_data.db_write_time; -// self.scan_read_time += new_data.scan_read_time; -// self.total_paths += new_data.total_paths; -// self.total_updated_paths += new_data.total_updated_paths; -// self.total_save_steps += new_data.total_save_steps; -// self.total_update_steps += new_data.total_update_steps; -// self.indexed_count += new_data.indexed_count; -// self.removed_count += new_data.removed_count; - -// for (path, size) in new_data.paths_and_sizes { -// *self.paths_and_sizes.entry(path).or_default() += size; -// } -// } -// } - -// #[derive(Clone)] -// pub enum ScanProgress { -// ChunkCount(usize), -// SavedChunks(usize), -// UpdatedChunks(usize), -// Message(String), -// } - -// impl OldIndexerJobData { -// fn on_scan_progress(ctx: &WorkerContext, progress: Vec) { -// ctx.progress( -// progress -// .into_iter() -// .map(|p| match p { -// ScanProgress::ChunkCount(c) => JobReportUpdate::TaskCount(c), -// ScanProgress::SavedChunks(p) | ScanProgress::UpdatedChunks(p) => { -// JobReportUpdate::CompletedTaskCount(p) -// } -// ScanProgress::Message(m) => JobReportUpdate::Message(m), -// }) -// .collect(), -// ) -// } -// } - -// /// `IndexerJobStepInput` defines the action that should be executed in the current step -// #[derive(Serialize, Deserialize, Debug)] -// pub enum OldIndexerJobStepInput { -// Save(OldIndexerJobSaveStep), -// Walk(ToWalkEntry), -// Update(OldIndexerJobUpdateStep), -// } - -// /// A `IndexerJob` is a stateful job that walks a directory and indexes all files. -// /// First it walks the directory and generates a list of files to index, chunked into -// /// batches of [`BATCH_SIZE`]. Then for each chunk it write the file metadata to the database. -// #[async_trait::async_trait] -// impl StatefulJob for OldIndexerJobInit { -// type Data = OldIndexerJobData; -// type Step = OldIndexerJobStepInput; -// type RunMetadata = OldIndexerJobRunMetadata; - -// const NAME: &'static str = "indexer"; -// const IS_BATCHED: bool = true; - -// fn target_location(&self) -> location::id::Type { -// self.location.id -// } - -// /// Creates a vector of valid path buffers from a directory, chunked into batches of `BATCH_SIZE`. -// async fn init( -// &self, -// ctx: &WorkerContext, -// data: &mut Option, -// ) -> Result, JobError> { -// let init = self; -// let location_id = init.location.id; -// let location_path = maybe_missing(&init.location.path, "location.path").map(Path::new)?; - -// let db = Arc::clone(&ctx.library.db); -// let sync = &ctx.library.sync; - -// let indexer_rules = init -// .location -// .indexer_rules -// .iter() -// .map(|rule| IndexerRule::try_from(&rule.indexer_rule)) -// .collect::, _>>() -// .map_err(IndexerError::from)?; - -// let to_walk_path = match &init.sub_path { -// Some(sub_path) if sub_path != Path::new("") => { -// let full_path = ensure_sub_path_is_in_location(location_path, sub_path) -// .await -// .map_err(IndexerError::from)?; -// ensure_sub_path_is_directory(location_path, sub_path) -// .await -// .map_err(IndexerError::from)?; - -// ensure_file_path_exists( -// sub_path, -// &IsolatedFilePathData::new(location_id, location_path, &full_path, true) -// .map_err(IndexerError::from)?, -// &db, -// IndexerError::SubPathNotFound, -// ) -// .await?; - -// full_path -// } -// _ => location_path.to_path_buf(), -// }; - -// let scan_start = Instant::now(); -// let WalkResult { -// walked, -// to_update, -// to_walk, -// to_remove, -// errors, -// paths_and_sizes, -// } = walk( -// &location_path, -// &to_walk_path, -// &indexer_rules, -// update_notifier_fn(ctx), -// file_paths_db_fetcher_fn!(&db), -// to_remove_db_fetcher_fn!(location_id, &db), -// iso_file_path_factory(location_id, location_path), -// 50_000, -// ) -// .await?; -// let scan_read_time = scan_start.elapsed(); -// let to_remove = to_remove.collect::>(); - -// debug!( -// "Walker at indexer job found {} file_paths to be removed", -// to_remove.len() -// ); - -// ctx.node -// .thumbnailer -// .remove_indexed_cas_ids( -// to_remove -// .iter() -// .filter_map(|file_path| file_path.cas_id.clone()) -// .collect::>(), -// ctx.library.id, -// ) -// .await; - -// let db_delete_start = Instant::now(); -// // TODO pass these uuids to sync system -// let removed_count = remove_non_existing_file_paths(to_remove, &db, sync).await?; -// let db_delete_time = db_delete_start.elapsed(); - -// let total_new_paths = &mut 0; -// let total_updated_paths = &mut 0; -// let to_walk_count = to_walk.len(); -// let to_save_chunks = &mut 0; -// let to_update_chunks = &mut 0; - -// let steps = walked -// .chunks(BATCH_SIZE) -// .into_iter() -// .enumerate() -// .map(|(i, chunk)| { -// let chunk_steps = chunk.collect::>(); - -// *total_new_paths += chunk_steps.len() as u64; -// *to_save_chunks += 1; - -// OldIndexerJobStepInput::Save(OldIndexerJobSaveStep { -// chunk_idx: i, -// walked: chunk_steps, -// }) -// }) -// .chain( -// to_update -// .chunks(BATCH_SIZE) -// .into_iter() -// .enumerate() -// .map(|(i, chunk)| { -// let chunk_updates = chunk.collect::>(); - -// *total_updated_paths += chunk_updates.len() as u64; -// *to_update_chunks += 1; - -// OldIndexerJobStepInput::Update(OldIndexerJobUpdateStep { -// chunk_idx: i, -// to_update: chunk_updates, -// }) -// }), -// ) -// .chain(to_walk.into_iter().map(OldIndexerJobStepInput::Walk)) -// .collect::>(); - -// debug!("Walker at indexer job found {total_updated_paths} file_paths to be updated"); - -// OldIndexerJobData::on_scan_progress( -// ctx, -// vec![ -// ScanProgress::ChunkCount(*to_save_chunks + *to_update_chunks), -// ScanProgress::Message(format!( -// "Starting saving {total_new_paths} files or directories, \ -// {total_updated_paths} files or directories to update, \ -// there still {to_walk_count} directories to index", -// )), -// ], -// ); - -// *data = Some(OldIndexerJobData { -// location_path: location_path.to_path_buf(), -// indexed_path: to_walk_path, -// indexer_rules, -// }); - -// Ok(( -// OldIndexerJobRunMetadata { -// db_write_time: db_delete_time, -// scan_read_time, -// total_paths: *total_new_paths, -// total_updated_paths: *total_updated_paths, -// indexed_count: 0, -// updated_count: 0, -// removed_count, -// total_save_steps: *to_save_chunks as u64, -// total_update_steps: *to_update_chunks as u64, -// paths_and_sizes, -// }, -// steps, -// errors -// .into_iter() -// .map(|e| format!("{e}")) -// .collect::>() -// .into(), -// ) -// .into()) -// } - -// /// Process each chunk of entries in the indexer job, writing to the `file_path` table -// async fn execute_step( -// &self, -// ctx: &WorkerContext, -// CurrentStep { step, .. }: CurrentStep<'_, Self::Step>, -// data: &Self::Data, -// run_metadata: &Self::RunMetadata, -// ) -> Result, JobError> { -// let init = self; -// let mut new_metadata = Self::RunMetadata::default(); -// match step { -// OldIndexerJobStepInput::Save(step) => { -// let start_time = Instant::now(); - -// OldIndexerJobData::on_scan_progress( -// ctx, -// vec![ -// ScanProgress::SavedChunks(step.chunk_idx + 1), -// ScanProgress::Message(format!( -// "Writing chunk {} of {} to database", -// step.chunk_idx, run_metadata.total_save_steps -// )), -// ], -// ); - -// let count = execute_indexer_save_step(&init.location, step, &ctx.library).await?; - -// new_metadata.indexed_count = count as u64; -// new_metadata.db_write_time = start_time.elapsed(); - -// Ok(new_metadata.into()) -// } -// OldIndexerJobStepInput::Update(to_update) => { -// let start_time = Instant::now(); -// OldIndexerJobData::on_scan_progress( -// ctx, -// vec![ -// ScanProgress::UpdatedChunks(to_update.chunk_idx + 1), -// ScanProgress::Message(format!( -// "Updating chunk {} of {} to database", -// to_update.chunk_idx, run_metadata.total_save_steps -// )), -// ], -// ); - -// let count = execute_indexer_update_step(to_update, &ctx.library).await?; - -// new_metadata.updated_count = count as u64; -// new_metadata.db_write_time = start_time.elapsed(); - -// Ok(new_metadata.into()) -// } - -// OldIndexerJobStepInput::Walk(to_walk_entry) => { -// let location_id = init.location.id; -// let location_path = -// maybe_missing(&init.location.path, "location.path").map(Path::new)?; - -// let db = Arc::clone(&ctx.library.db); -// let sync = &ctx.library.sync; - -// let scan_start = Instant::now(); - -// let WalkResult { -// walked, -// to_update, -// to_walk, -// to_remove, -// errors, -// paths_and_sizes, -// } = keep_walking( -// location_path, -// to_walk_entry, -// &data.indexer_rules, -// update_notifier_fn(ctx), -// file_paths_db_fetcher_fn!(&db), -// to_remove_db_fetcher_fn!(location_id, &db), -// iso_file_path_factory(location_id, location_path), -// ) -// .await?; - -// new_metadata.paths_and_sizes = paths_and_sizes; - -// new_metadata.scan_read_time = scan_start.elapsed(); - -// let db_delete_time = Instant::now(); -// // TODO pass these uuids to sync system -// new_metadata.removed_count = -// remove_non_existing_file_paths(to_remove, &db, sync).await?; -// new_metadata.db_write_time = db_delete_time.elapsed(); - -// let to_walk_count = to_walk.len(); - -// let more_steps = walked -// .chunks(BATCH_SIZE) -// .into_iter() -// .enumerate() -// .map(|(i, chunk)| { -// let chunk_steps = chunk.collect::>(); -// new_metadata.total_paths += chunk_steps.len() as u64; -// new_metadata.total_save_steps += 1; - -// OldIndexerJobStepInput::Save(OldIndexerJobSaveStep { -// chunk_idx: i, -// walked: chunk_steps, -// }) -// }) -// .chain(to_update.chunks(BATCH_SIZE).into_iter().enumerate().map( -// |(i, chunk)| { -// let chunk_updates = chunk.collect::>(); -// new_metadata.total_updated_paths += chunk_updates.len() as u64; -// new_metadata.total_update_steps += 1; - -// OldIndexerJobStepInput::Update(OldIndexerJobUpdateStep { -// chunk_idx: i, -// to_update: chunk_updates, -// }) -// }, -// )) -// .chain(to_walk.into_iter().map(OldIndexerJobStepInput::Walk)) -// .collect::>(); - -// OldIndexerJobData::on_scan_progress( -// ctx, -// vec![ -// ScanProgress::ChunkCount(more_steps.len() - to_walk_count), -// ScanProgress::Message(format!( -// "Scanned {} more files or directories; \ -// {} more directories to scan and {} more entries to update", -// new_metadata.total_paths, -// to_walk_count, -// new_metadata.total_updated_paths -// )), -// ], -// ); - -// Ok(( -// more_steps, -// new_metadata, -// errors -// .into_iter() -// .map(|e| format!("{e}")) -// .collect::>() -// .into(), -// ) -// .into()) -// } -// } -// } - -// async fn finalize( -// &self, -// ctx: &WorkerContext, -// data: &Option, -// run_metadata: &Self::RunMetadata, -// ) -> JobResult { -// let init = self; -// let indexed_path_str = data -// .as_ref() -// .map(|data| Ok(data.indexed_path.to_string_lossy().to_string())) -// .unwrap_or_else(|| maybe_missing(&init.location.path, "location.path").cloned())?; - -// info!( -// "Scan of {indexed_path_str} completed in {:?}. {} new files found, \ -// indexed {} files in db, updated {} entries. db write completed in {:?}", -// run_metadata.scan_read_time, -// run_metadata.total_paths, -// run_metadata.indexed_count, -// run_metadata.total_updated_paths, -// run_metadata.db_write_time, -// ); - -// if run_metadata.indexed_count > 0 || run_metadata.removed_count > 0 { -// invalidate_query!(ctx.library, "search.paths"); -// } - -// if run_metadata.total_updated_paths > 0 { -// // Invoking orphan remover here as we probably have some orphans objects due to updates -// // ctx.library.orphan_remover.invoke().await; -// } - -// if run_metadata.indexed_count > 0 -// || run_metadata.removed_count > 0 -// || run_metadata.updated_count > 0 -// { -// if let Some(data) = data { -// update_directories_sizes( -// &run_metadata.paths_and_sizes, -// init.location.id, -// &data.indexed_path, -// &ctx.library, -// ) -// .await?; - -// if data.indexed_path != data.location_path { -// reverse_update_directories_sizes( -// &data.indexed_path, -// init.location.id, -// &data.location_path, -// &ctx.library, -// ) -// .await -// .map_err(IndexerError::from)?; -// } - -// update_location_size(init.location.id, &ctx.library) -// .await -// .map_err(IndexerError::from)?; - -// ctx.library -// .db -// .location() -// .update( -// location::id::equals(init.location.id), -// vec![location::scan_state::set(ScanState::Indexed as i32)], -// ) -// .exec() -// .await -// .map_err(IndexerError::from)?; -// } -// } - -// // FIXME(fogodev): This is currently a workaround to don't save paths and sizes in the -// // metadata after a job is completed, as it's pretty heavy. A proper fix isn't needed -// // right now as I already changed it in the new indexer job. And this old one -// // will be removed eventually. -// let run_metadata = Self::RunMetadata { -// db_write_time: run_metadata.db_write_time, -// scan_read_time: run_metadata.scan_read_time, -// total_paths: run_metadata.total_paths, -// total_updated_paths: run_metadata.total_updated_paths, -// total_save_steps: run_metadata.total_save_steps, -// total_update_steps: run_metadata.total_update_steps, -// indexed_count: run_metadata.indexed_count, -// updated_count: run_metadata.updated_count, -// removed_count: run_metadata.removed_count, -// paths_and_sizes: HashMap::new(), -// }; - -// Ok(Some(json!({"init: ": init, "run_metadata": run_metadata}))) -// } -// } - -// fn update_notifier_fn(ctx: &WorkerContext) -> impl FnMut(&Path, usize) + '_ { -// move |path, total_entries| { -// OldIndexerJobData::on_scan_progress( -// ctx, -// vec![ScanProgress::Message(format!( -// "{total_entries} entries found at {}", -// path.display() -// ))], -// ); -// } -// } - -// async fn update_directories_sizes( -// paths_and_sizes: &HashMap, -// location_id: location::id::Type, -// location_path: impl AsRef, -// library: &Library, -// ) -> Result<(), IndexerError> { -// let location_path = location_path.as_ref(); - -// let Library { db, sync, .. } = library; - -// let chunked_queries = paths_and_sizes -// .keys() -// .chunks(200) -// .into_iter() -// .map(|paths_chunk| { -// paths_chunk -// .into_iter() -// .map(|path| { -// IsolatedFilePathData::new(location_id, location_path, path, true) -// .map(file_path::WhereParam::from) -// }) -// .collect::, _>>() -// .map(|params| { -// db.file_path() -// .find_many(vec![or(params)]) -// .select(file_path::select!({ pub_id materialized_path name })) -// }) -// }) -// .collect::, _>>()?; - -// let to_sync_and_update = db -// ._batch(chunked_queries) -// .await? -// .into_iter() -// .flatten() -// .filter_map( -// |file_path| match (file_path.materialized_path, file_path.name) { -// (Some(materialized_path), Some(name)) => { -// let mut directory_full_path = location_path.join(&materialized_path[1..]); -// directory_full_path.push(name); - -// if let Some(size) = paths_and_sizes.get(&directory_full_path) { -// let size_bytes = size.to_be_bytes().to_vec(); - -// Some(( -// sync.shared_update( -// prisma_sync::file_path::SyncId { -// pub_id: file_path.pub_id.clone(), -// }, -// file_path::size_in_bytes_bytes::NAME, -// msgpack!(size_bytes.clone()), -// ), -// db.file_path().update( -// file_path::pub_id::equals(file_path.pub_id), -// vec![file_path::size_in_bytes_bytes::set(Some(size_bytes))], -// ), -// )) -// } else { -// warn!("Found a file_path without ancestor in the database, possible corruption"); -// None -// } -// } -// _ => { -// warn!( -// "Found a file_path missing its materialized_path or name: ", -// from_bytes_to_uuid(&file_path.pub_id) -// ); -// None -// } -// }, -// ) -// .unzip::<_, _, Vec<_>, Vec<_>>(); - -// sync.write_ops(db, to_sync_and_update).await?; - -// Ok(()) -// } diff --git a/core/src/location/indexer/old_shallow.rs b/core/src/location/indexer/old_shallow.rs deleted file mode 100644 index 4860f13e9afd..000000000000 --- a/core/src/location/indexer/old_shallow.rs +++ /dev/null @@ -1,197 +0,0 @@ -// use crate::{ -// file_paths_db_fetcher_fn, invalidate_query, -// library::Library, -// location::{ -// indexer::{ -// execute_indexer_update_step, reverse_update_directories_sizes, OldIndexerJobUpdateStep, -// }, -// scan_location_sub_path, update_location_size, -// }, -// old_job::JobError, -// to_remove_db_fetcher_fn, Node, -// }; - -// use sd_core_file_path_helper::{ -// check_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, -// IsolatedFilePathData, -// }; -// use sd_core_indexer_rules::IndexerRule; - -// use sd_utils::db::maybe_missing; - -// use std::{ -// collections::HashSet, -// path::{Path, PathBuf}, -// sync::Arc, -// }; - -// use futures::future::join_all; -// use itertools::Itertools; -// use tracing::{debug, error}; - -// use super::{ -// execute_indexer_save_step, iso_file_path_factory, location_with_indexer_rules, -// old_walk::walk_single_dir, remove_non_existing_file_paths, IndexerError, OldIndexerJobSaveStep, -// }; - -// /// BATCH_SIZE is the number of files to index at each step, writing the chunk of files metadata in the database. -// const BATCH_SIZE: usize = 1000; - -// pub async fn old_shallow( -// location: &location_with_indexer_rules::Data, -// sub_path: &PathBuf, -// node: &Arc, -// library: &Arc, -// ) -> Result<(), JobError> { -// let location_id = location.id; -// let location_path = maybe_missing(&location.path, "location.path").map(Path::new)?; - -// let db = library.db.clone(); -// let sync = &library.sync; - -// let indexer_rules = location -// .indexer_rules -// .iter() -// .map(|rule| IndexerRule::try_from(&rule.indexer_rule)) -// .collect::, _>>() -// .map_err(IndexerError::from)?; - -// let (add_root, to_walk_path) = if sub_path != Path::new("") && sub_path != Path::new("/") { -// let full_path = ensure_sub_path_is_in_location(&location_path, &sub_path) -// .await -// .map_err(IndexerError::from)?; -// ensure_sub_path_is_directory(&location_path, &sub_path) -// .await -// .map_err(IndexerError::from)?; - -// ( -// !check_file_path_exists::( -// &IsolatedFilePathData::new(location_id, location_path, &full_path, true) -// .map_err(IndexerError::from)?, -// &db, -// ) -// .await?, -// full_path, -// ) -// } else { -// (false, location_path.to_path_buf()) -// }; - - // let (walked, to_update, to_remove, errors, _s) = { - // walk_single_dir( - // location_path, - // &to_walk_path, - // &indexer_rules, - // file_paths_db_fetcher_fn!(&db), - // to_remove_db_fetcher_fn!(location_id, &db), - // iso_file_path_factory(location_id, location_path), - // add_root, - // ) - // .await? - // }; - -// let to_remove_count = to_remove.len(); - -// node.thumbnailer -// .remove_indexed_cas_ids( -// to_remove -// .iter() -// .filter_map(|file_path| file_path.cas_id.clone()) -// .collect::>(), -// library.id, -// ) -// .await; - -// errors.into_iter().for_each(|e| error!("{e}")); - -// remove_non_existing_file_paths(to_remove, &db, sync).await?; - -// let mut new_directories_to_scan = HashSet::new(); - -// let mut to_create_count = 0; - -// let save_steps = walked -// .chunks(BATCH_SIZE) -// .into_iter() -// .enumerate() -// .map(|(i, chunk)| { -// let walked = chunk.collect::>(); -// to_create_count += walked.len(); - -// walked -// .iter() -// .filter_map(|walked_entry| { -// walked_entry.iso_file_path.materialized_path_for_children() -// }) -// .for_each(|new_dir| { -// new_directories_to_scan.insert(new_dir); -// }); - -// OldIndexerJobSaveStep { -// chunk_idx: i, -// walked, -// } -// }) -// .collect::>(); - -// for step in save_steps { -// execute_indexer_save_step(location, &step, library).await?; -// } - -// for scan in join_all( -// new_directories_to_scan -// .into_iter() -// .map(|sub_path| scan_location_sub_path(node, library, location.clone(), sub_path)), -// ) -// .await -// { -// if let Err(e) = scan { -// error!("{e}"); -// } -// } - -// let mut to_update_count = 0; - -// let update_steps = to_update -// .chunks(BATCH_SIZE) -// .into_iter() -// .enumerate() -// .map(|(i, chunk)| { -// let to_update = chunk.collect::>(); -// to_update_count += to_update.len(); - -// OldIndexerJobUpdateStep { -// chunk_idx: i, -// to_update, -// } -// }) -// .collect::>(); - -// for step in update_steps { -// execute_indexer_update_step(&step, library).await?; -// } - -// debug!( -// "Walker at shallow indexer found: \ -// To create: {to_create_count}; To update: {to_update_count}; To remove: {to_remove_count};" -// ); - -// if to_create_count > 0 || to_update_count > 0 || to_remove_count > 0 { -// if to_walk_path != location_path { -// reverse_update_directories_sizes(to_walk_path, location_id, location_path, library) -// .await -// .map_err(IndexerError::from)?; -// } - -// update_location_size(location.id, library) -// .await -// .map_err(IndexerError::from)?; - -// invalidate_query!(library, "search.paths"); -// invalidate_query!(library, "search.objects"); -// } - -// // library.orphan_remover.invoke().await; - -// Ok(()) -// } diff --git a/core/src/location/indexer/old_walk.rs b/core/src/location/indexer/old_walk.rs deleted file mode 100644 index 1cc73dfd52d5..000000000000 --- a/core/src/location/indexer/old_walk.rs +++ /dev/null @@ -1,1120 +0,0 @@ -use sd_core_file_path_helper::{FilePathMetadata, IsolatedFilePathData}; -use sd_core_indexer_rules::{ - seed::{GitIgnoreRules, GITIGNORE}, - IndexerRule, RuleKind, -}; -use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker}; - -use sd_prisma::prisma::file_path; -use sd_utils::{db::inode_from_db, error::FileIOError}; - -use std::{ - collections::{HashMap, HashSet, VecDeque}, - future::Future, - hash::{Hash, Hasher}, - ops::Deref, - path::{Path, PathBuf}, -}; - -use chrono::{DateTime, Duration, FixedOffset}; -use serde::{Deserialize, Serialize}; -use tokio::fs; -use tracing::trace; -use uuid::Uuid; - -use super::IndexerError; - -const TO_WALK_QUEUE_INITIAL_CAPACITY: usize = 32; -const WALKER_PATHS_BUFFER_INITIAL_CAPACITY: usize = 256; -const WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY: usize = 32; - -/// `WalkEntry` represents a single path in the filesystem, for any comparison purposes, we only -/// consider the path itself, not the metadata. -#[derive(Debug, Serialize, Deserialize)] -pub struct WalkedEntry { - pub pub_id: Uuid, - pub maybe_object_id: file_path::object_id::Type, - pub iso_file_path: IsolatedFilePathData<'static>, - pub metadata: FilePathMetadata, -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct ToWalkEntry { - path: PathBuf, - parent_dir_accepted_by_its_children: Option, - maybe_parent: Option, -} - -#[derive(Debug)] -struct WalkingEntry { - iso_file_path: IsolatedFilePathData<'static>, - maybe_metadata: Option, -} - -impl From for WalkedEntry { - fn from(walking_entry: WalkingEntry) -> Self { - let WalkingEntry { - iso_file_path, - maybe_metadata, - } = walking_entry; - - Self { - pub_id: Uuid::new_v4(), - maybe_object_id: None, - iso_file_path, - metadata: maybe_metadata - .expect("we always use Some in `the inner_walk_single_dir` function"), - } - } -} - -impl From<(Uuid, file_path::object_id::Type, WalkingEntry)> for WalkedEntry { - fn from( - (pub_id, maybe_object_id, walking_entry): (Uuid, file_path::object_id::Type, WalkingEntry), - ) -> Self { - let WalkingEntry { - iso_file_path, - maybe_metadata, - } = walking_entry; - - Self { - pub_id, - maybe_object_id, - iso_file_path, - metadata: maybe_metadata - .expect("we always use Some in `the inner_walk_single_dir` function"), - } - } -} - -impl PartialEq for WalkingEntry { - fn eq(&self, other: &Self) -> bool { - self.iso_file_path == other.iso_file_path - } -} - -impl Eq for WalkingEntry {} - -impl Hash for WalkingEntry { - fn hash(&self, state: &mut H) { - self.iso_file_path.hash(state); - } -} - -pub struct WalkResult -where - Walked: Iterator, - ToUpdate: Iterator, - ToRemove: Iterator, -{ - pub walked: Walked, - pub to_update: ToUpdate, - pub to_walk: VecDeque, - pub to_remove: ToRemove, - pub errors: Vec, - pub paths_and_sizes: HashMap, -} - -/// This function walks through the filesystem, applying the rules to each entry and then returning -/// a list of accepted entries. There are some useful comments in the implementation of this function -/// in case of doubts. -pub(super) async fn walk( - library_root: impl AsRef, - current_dir: impl AsRef, - indexer_rules: &[IndexerRule], - mut update_notifier: impl FnMut(&Path, usize), - file_paths_db_fetcher: impl Fn(Vec) -> FilePathDBFetcherFut, - to_remove_db_fetcher: impl Fn( - IsolatedFilePathData<'static>, - Vec, - ) -> ToRemoveDbFetcherFut, - iso_file_path_factory: impl Fn(&Path, bool) -> Result, IndexerError>, - limit: u64, -) -> Result< - WalkResult< - impl Iterator, - impl Iterator, - impl Iterator, - >, - IndexerError, -> -where - FilePathDBFetcherFut: Future, IndexerError>>, - ToRemoveDbFetcherFut: - Future, IndexerError>>, -{ - let current_dir = current_dir.as_ref(); - - let mut to_walk = VecDeque::with_capacity(TO_WALK_QUEUE_INITIAL_CAPACITY); - to_walk.push_back(ToWalkEntry { - path: current_dir.to_path_buf(), - parent_dir_accepted_by_its_children: None, - maybe_parent: None, - }); - let mut indexed_paths = HashSet::with_capacity(WALKER_PATHS_BUFFER_INITIAL_CAPACITY); - let mut errors = vec![]; - let mut paths_buffer = HashSet::with_capacity(WALKER_PATHS_BUFFER_INITIAL_CAPACITY); - let mut paths_and_sizes = HashMap::with_capacity(TO_WALK_QUEUE_INITIAL_CAPACITY); - let mut to_remove = vec![]; - - while let Some(entry) = to_walk.pop_front() { - let last_indexed_count = indexed_paths.len(); - - let (entry_size, current_to_remove) = inner_walk_single_dir( - library_root.as_ref(), - current_dir, - &entry, - indexer_rules, - &to_remove_db_fetcher, - &iso_file_path_factory, - WorkingTable { - indexed_paths: &mut indexed_paths, - paths_buffer: &mut paths_buffer, - maybe_to_walk: Some(&mut to_walk), - errors: &mut errors, - }, - ) - .await; - to_remove.push(current_to_remove); - - update_notifier(&entry.path, indexed_paths.len() - last_indexed_count); - - // Saving the size of current entry - paths_and_sizes.insert(entry.path, entry_size); - - // Adding the size of current entry to its parent - if let Some(parent) = entry.maybe_parent { - *paths_and_sizes.entry(parent).or_default() += entry_size; - } - - if indexed_paths.len() >= limit as usize { - break; - } - } - - let (walked, to_update) = filter_existing_paths(indexed_paths, file_paths_db_fetcher).await?; - - Ok(WalkResult { - walked, - to_update, - to_walk, - to_remove: to_remove.into_iter().flatten(), - errors, - paths_and_sizes, - }) -} - -pub(super) async fn keep_walking( - location_path: impl AsRef, - to_walk_entry: &ToWalkEntry, - indexer_rules: &[IndexerRule], - mut update_notifier: impl FnMut(&Path, usize), - file_paths_db_fetcher: impl Fn(Vec) -> FilePathDBFetcherFut, - to_remove_db_fetcher: impl Fn( - IsolatedFilePathData<'static>, - Vec, - ) -> ToRemoveDbFetcherFut, - iso_file_path_factory: impl Fn(&Path, bool) -> Result, IndexerError>, -) -> Result< - WalkResult< - impl Iterator, - impl Iterator, - impl Iterator, - >, - IndexerError, -> -where - FilePathDBFetcherFut: Future, IndexerError>>, - ToRemoveDbFetcherFut: - Future, IndexerError>>, -{ - let mut to_keep_walking = VecDeque::with_capacity(TO_WALK_QUEUE_INITIAL_CAPACITY); - let mut indexed_paths = HashSet::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY); - let mut paths_buffer = HashSet::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY); - let mut errors = vec![]; - - let (to_walk_entry_size, to_remove) = inner_walk_single_dir( - location_path, - to_walk_entry.path.clone(), - to_walk_entry, - indexer_rules, - &to_remove_db_fetcher, - &iso_file_path_factory, - WorkingTable { - indexed_paths: &mut indexed_paths, - paths_buffer: &mut paths_buffer, - maybe_to_walk: Some(&mut to_keep_walking), - errors: &mut errors, - }, - ) - .await; - - update_notifier(&to_walk_entry.path, indexed_paths.len()); - - let (walked, to_update) = filter_existing_paths(indexed_paths, file_paths_db_fetcher).await?; - - Ok(WalkResult { - walked, - to_update, - to_walk: to_keep_walking, - to_remove: to_remove.into_iter(), - errors, - paths_and_sizes: [ - Some((to_walk_entry.path.clone(), to_walk_entry_size)), - to_walk_entry - .maybe_parent - .as_ref() - .map(|parent_path| (parent_path.clone(), to_walk_entry_size)), - ] - .into_iter() - .flatten() - .collect(), - }) -} - -pub(super) async fn walk_single_dir( - location_path: impl AsRef, - current_dir: impl AsRef, - indexer_rules: &[IndexerRule], - file_paths_db_fetcher: impl Fn(Vec) -> FilePathDBFetcherFut, - to_remove_db_fetcher: impl Fn( - IsolatedFilePathData<'static>, - Vec, - ) -> ToRemoveDbFetcherFut, - iso_file_path_factory: impl Fn(&Path, bool) -> Result, IndexerError>, - add_root: bool, -) -> Result< - ( - impl Iterator, - impl Iterator, - Vec, - Vec, - u64, - ), - IndexerError, -> -where - FilePathDBFetcherFut: Future, IndexerError>>, - ToRemoveDbFetcherFut: - Future, IndexerError>>, -{ - let current_directory = current_dir.as_ref(); - - let mut indexed_paths = HashSet::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY); - - if add_root { - let metadata = fs::metadata(current_directory) - .await - .map_err(|e| FileIOError::from((current_directory, e)))?; - - indexed_paths.insert(WalkingEntry { - iso_file_path: iso_file_path_factory(current_directory, true)?, - maybe_metadata: Some(FilePathMetadata::from_path(current_directory, &metadata)?), - }); - } - - let mut paths_buffer = HashSet::with_capacity(WALK_SINGLE_DIR_PATHS_BUFFER_INITIAL_CAPACITY); - let mut errors = vec![]; - - let (root_size, to_remove) = inner_walk_single_dir( - location_path, - current_directory, - &ToWalkEntry { - path: current_directory.to_path_buf(), - parent_dir_accepted_by_its_children: None, - maybe_parent: None, - }, - indexer_rules, - &to_remove_db_fetcher, - &iso_file_path_factory, - WorkingTable { - indexed_paths: &mut indexed_paths, - paths_buffer: &mut paths_buffer, - maybe_to_walk: None, - errors: &mut errors, - }, - ) - .await; - - let (walked, to_update) = filter_existing_paths(indexed_paths, file_paths_db_fetcher).await?; - - Ok((walked, to_update, to_remove, errors, root_size)) -} - -async fn filter_existing_paths( - indexed_paths: HashSet, - file_paths_db_fetcher: impl Fn(Vec) -> F, -) -> Result< - ( - impl Iterator, - impl Iterator, - ), - IndexerError, -> -where - F: Future, IndexerError>>, -{ - if !indexed_paths.is_empty() { - file_paths_db_fetcher( - indexed_paths - .iter() - .map(|entry| &entry.iso_file_path) - .map(Into::into) - .collect(), - ) - .await - } else { - Ok(vec![]) - } - .map(move |file_paths| { - let isolated_paths_already_in_db = file_paths - .into_iter() - .flat_map(|file_path| { - IsolatedFilePathData::try_from(file_path.clone()) - .map(|iso_file_path| (iso_file_path, file_path)) - }) - .collect::>(); - - let mut to_update = vec![]; - - let to_create = indexed_paths - .into_iter() - .filter_map(|entry| { - if let Some(file_path) = isolated_paths_already_in_db.get(&entry.iso_file_path) { - if let (Some(metadata), Some(inode), Some(date_modified)) = ( - &entry.maybe_metadata, - &file_path.inode, - &file_path.date_modified, - ) { - if ( - inode_from_db(&inode[0..8]) != metadata.inode - // Datetimes stored in DB loses a bit of precision, so we need to check against a delta - // instead of using != operator - || DateTime::::from(metadata.modified_at) - *date_modified - > Duration::milliseconds(1) || file_path.hidden.is_none() || metadata.hidden != file_path.hidden.unwrap_or_default() - ) - // We ignore the size of directories because it is not reliable, we need to - // calculate it ourselves later - && !( - entry.iso_file_path.to_parts().is_dir - && metadata.size_in_bytes - != file_path - .size_in_bytes_bytes - .as_ref() - .map(|size_in_bytes_bytes| { - u64::from_be_bytes([ - size_in_bytes_bytes[0], - size_in_bytes_bytes[1], - size_in_bytes_bytes[2], - size_in_bytes_bytes[3], - size_in_bytes_bytes[4], - size_in_bytes_bytes[5], - size_in_bytes_bytes[6], - size_in_bytes_bytes[7], - ]) - }) - .unwrap_or_default() - ) { - to_update.push( - (sd_utils::from_bytes_to_uuid(&file_path.pub_id), file_path.object_id, entry).into(), - ); - } - } - - None - } else { - Some(entry.into()) - } - }) - .collect::>(); - - (to_create.into_iter(), to_update.into_iter()) - }) -} - -struct WorkingTable<'a> { - indexed_paths: &'a mut HashSet, - paths_buffer: &'a mut HashSet, - maybe_to_walk: Option<&'a mut VecDeque>, - errors: &'a mut Vec, -} - -async fn inner_walk_single_dir( - library_root: impl AsRef, - current_dir: impl AsRef, - ToWalkEntry { - path, - parent_dir_accepted_by_its_children, - .. - }: &ToWalkEntry, - indexer_rules: &[IndexerRule], - to_remove_db_fetcher: impl Fn( - IsolatedFilePathData<'static>, - Vec, - ) -> ToRemoveDbFetcherFut, - iso_file_path_factory: &impl Fn(&Path, bool) -> Result, IndexerError>, - WorkingTable { - indexed_paths, - paths_buffer, - mut maybe_to_walk, - errors, - }: WorkingTable<'_>, -) -> (u64, Vec) -where - ToRemoveDbFetcherFut: - Future, IndexerError>>, -{ - let Ok(iso_file_path_to_walk) = iso_file_path_factory(path, true).map_err(|e| errors.push(e)) - else { - return (0, vec![]); - }; - - let Ok(mut read_dir) = fs::read_dir(path) - .await - .map_err(|e| errors.push(FileIOError::from((path.clone(), e)).into())) - else { - return (0, vec![]); - }; - - let mut rules = indexer_rules.to_owned(); - - if rules.iter().any(|rule| GITIGNORE.deref() == rule) { - if let Some(pat) = - GitIgnoreRules::get_rules_if_in_git_repo(library_root.as_ref(), path).await - { - rules.extend(pat.into_iter().map(Into::into)); - } - } - - let current_dir = current_dir.as_ref(); - - // Just to make sure... - paths_buffer.clear(); - - // Marking with a loop label here in case of rejection or errors, to continue with next entry - 'entries: loop { - let entry = match read_dir.next_entry().await { - Ok(Some(entry)) => entry, - Ok(None) => break, - Err(e) => { - errors.push(FileIOError::from((path.clone(), e)).into()); - continue; - } - }; - - // Accept by children has three states, - // None if we don't now yet or if this check doesn't apply - // Some(true) if this check applies and it passes - // Some(false) if this check applies and it was rejected - // and we pass the current parent state to its children - let mut accept_by_children_dir = *parent_dir_accepted_by_its_children; - - let current_path = entry.path(); - - trace!( - "Current filesystem path: {}, accept_by_children_dir: {:#?}", - current_path.display(), - accept_by_children_dir - ); - - let Ok(rules_per_kind) = IndexerRule::apply_all(&rules, ¤t_path) - .await - .map_err(|e| errors.push(e.into())) - else { - continue 'entries; - }; - - if rules_per_kind - .get(&RuleKind::RejectFilesByGlob) - .map_or(false, |reject_results| { - reject_results.iter().any(|reject| !reject) - }) { - trace!( - "Path {} rejected by `RuleKind::RejectFilesByGlob`", - current_path.display() - ); - continue 'entries; - } - - if let Some(f) = rules_per_kind.get(&RuleKind::IgnoredByGit) { - if f.iter().any(|s| !s) { - continue 'entries; - } - } - - let Ok(metadata) = entry - .metadata() - .await - .map_err(|e| errors.push(FileIOError::from((¤t_path, e)).into())) - else { - continue 'entries; - }; - - // TODO: Hard ignoring symlinks for now, but this should be configurable - if metadata.is_symlink() { - continue 'entries; - } - - let is_dir = metadata.is_dir(); - - if is_dir { - // If it is a directory, first we check if we must reject it and its children entirely - if rules_per_kind - .get(&RuleKind::RejectIfChildrenDirectoriesArePresent) - .map_or(false, |reject_results| { - reject_results.iter().any(|reject| !reject) - }) { - trace!( - "Path {} rejected by rule `RuleKind::RejectIfChildrenDirectoriesArePresent`", - current_path.display(), - ); - continue 'entries; - } - - // Then we check if we must accept it and its children - if let Some(accept_by_children_rules) = - rules_per_kind.get(&RuleKind::AcceptIfChildrenDirectoriesArePresent) - { - if accept_by_children_rules.iter().any(|accept| *accept) { - accept_by_children_dir = Some(true); - } - - // If it wasn't accepted then we mark as rejected - if accept_by_children_dir.is_none() { - trace!( - "Path {} rejected because it didn't passed in any AcceptIfChildrenDirectoriesArePresent rule", - current_path.display() - ); - accept_by_children_dir = Some(false); - } - } - - // Then we check if there's a git ignore rule for it - if let Some(accept) = rules_per_kind.get(&RuleKind::IgnoredByGit) { - if !accept.iter().any(|&r| r) { - trace!(dir=?current_path, "ignoring files because of git ignore"); - continue 'entries; - } - } - - // Then we mark this directory the be walked in too - if let Some(ref mut to_walk) = maybe_to_walk { - to_walk.push_back(ToWalkEntry { - path: current_path.clone(), - parent_dir_accepted_by_its_children: accept_by_children_dir, - maybe_parent: Some(path.clone()), - }); - } - } - - if rules_per_kind - .get(&RuleKind::AcceptFilesByGlob) - .map_or(false, |accept_rules| { - accept_rules.iter().all(|accept| !accept) - }) { - trace!( - "Path {} reject because it didn't passed in any AcceptFilesByGlob rules", - current_path.display() - ); - continue 'entries; - } - - if accept_by_children_dir.unwrap_or(true) { - let Ok(iso_file_path) = - iso_file_path_factory(¤t_path, is_dir).map_err(|e| errors.push(e)) - else { - continue 'entries; - }; - - let Ok(metadata) = FilePathMetadata::from_path(¤t_path, &metadata) - .map_err(|e| errors.push(e.into())) - else { - continue; - }; - - paths_buffer.insert(WalkingEntry { - iso_file_path, - maybe_metadata: Some(metadata), - }); - - // If the ancestors directories wasn't indexed before, now we do - for ancestor in current_path - .ancestors() - .skip(1) // Skip the current directory as it was already indexed - .take_while(|&ancestor| ancestor != current_dir) - { - let Ok(iso_file_path) = - iso_file_path_factory(ancestor, true).map_err(|e| errors.push(e)) - else { - // Checking the next ancestor, as this one we got an error - continue; - }; - - let mut ancestor_iso_walking_entry = WalkingEntry { - iso_file_path, - maybe_metadata: None, - }; - trace!("Indexing ancestor {}", ancestor.display()); - if !indexed_paths.contains(&ancestor_iso_walking_entry) { - let Ok(metadata) = fs::metadata(ancestor) - .await - .map_err(|e| errors.push(FileIOError::from((&ancestor, e)).into())) - else { - // Checking the next ancestor, as this one we got an error - continue; - }; - - let Ok(metadata) = FilePathMetadata::from_path(ancestor, &metadata) - .map_err(|e| errors.push(e.into())) - else { - continue; - }; - - ancestor_iso_walking_entry.maybe_metadata = Some(metadata); - - paths_buffer.insert(ancestor_iso_walking_entry); - } else { - // If indexed_paths contains the current ancestors, then it will contain - // also all if its ancestors too, so we can stop here - break; - } - } - } - } - - // We continue the function even if we fail to fetch `file_path`s to remove, - // the DB will have old `file_path`s but at least this is better than - // don't adding the newly indexed paths - let to_remove = to_remove_db_fetcher( - iso_file_path_to_walk, - paths_buffer - .iter() - .map(|entry| &entry.iso_file_path) - .map(Into::into) - .collect(), - ) - .await - .unwrap_or_else(|e| { - errors.push(e); - vec![] - }); - - let mut to_walk_entry_size = 0; - - // Just merging the `found_paths` with `indexed_paths` here in the end to avoid possibly - // multiple rehashes during function execution - indexed_paths.extend(paths_buffer.drain().map(|walking_entry| { - if let Some(metadata) = &walking_entry.maybe_metadata { - to_walk_entry_size += metadata.size_in_bytes; - } - walking_entry - })); - - (to_walk_entry_size, to_remove) -} - -#[cfg(test)] -#[allow(clippy::unwrap_used, clippy::panic)] -mod tests { - use super::*; - use chrono::Utc; - use globset::{Glob, GlobSetBuilder}; - use sd_core_indexer_rules::RulePerKind; - use tempfile::{tempdir, TempDir}; - // use tracing_test::traced_test; - - impl PartialEq for WalkedEntry { - fn eq(&self, other: &Self) -> bool { - self.iso_file_path == other.iso_file_path - } - } - - impl Eq for WalkedEntry {} - - impl Hash for WalkedEntry { - fn hash(&self, state: &mut H) { - self.iso_file_path.hash(state); - } - } - - fn new_indexer_rule( - name: impl Into, - default: bool, - rules: Vec, - ) -> IndexerRule { - IndexerRule { - id: None, - name: name.into(), - default, - rules, - date_created: Utc::now(), - date_modified: Utc::now(), - } - } - - async fn prepare_location() -> TempDir { - let root = tempdir().unwrap(); - let root_path = root.path(); - let rust_project = root_path.join("rust_project"); - let inner_project = root_path.join("inner"); - let node_project = inner_project.join("node_project"); - let photos = root_path.join("photos"); - - fs::create_dir(&rust_project).await.unwrap(); - fs::create_dir(&inner_project).await.unwrap(); - fs::create_dir(&node_project).await.unwrap(); - fs::create_dir(&photos).await.unwrap(); - - // Making rust and node projects a git repository - fs::create_dir(rust_project.join(".git")).await.unwrap(); - fs::create_dir(node_project.join(".git")).await.unwrap(); - - // Populating rust project - fs::File::create(rust_project.join("Cargo.toml")) - .await - .unwrap(); - let rust_src_dir = rust_project.join("src"); - fs::create_dir(&rust_src_dir).await.unwrap(); - fs::File::create(rust_src_dir.join("main.rs")) - .await - .unwrap(); - let rust_target_dir = rust_project.join("target"); - fs::create_dir(&rust_target_dir).await.unwrap(); - let rust_build_dir = rust_target_dir.join("debug"); - fs::create_dir(&rust_build_dir).await.unwrap(); - fs::File::create(rust_build_dir.join("main")).await.unwrap(); - - // Populating node project - fs::File::create(node_project.join("package.json")) - .await - .unwrap(); - let node_src_dir = node_project.join("src"); - fs::create_dir(&node_src_dir).await.unwrap(); - fs::File::create(node_src_dir.join("App.tsx")) - .await - .unwrap(); - let node_modules = node_project.join("node_modules"); - fs::create_dir(&node_modules).await.unwrap(); - let node_modules_dep = node_modules.join("react"); - fs::create_dir(&node_modules_dep).await.unwrap(); - fs::File::create(node_modules_dep.join("package.json")) - .await - .unwrap(); - - // Photos directory - for photo in ["photo1.png", "photo2.jpg", "photo3.jpeg", "text.txt"].iter() { - fs::File::create(photos.join(photo)).await.unwrap(); - } - - root - } - - #[tokio::test] - async fn test_walk_without_rules() { - let root = prepare_location().await; - let root_path = root.path(); - - let metadata = FilePathMetadata { - inode: 0, - size_in_bytes: 0, - created_at: Utc::now(), - modified_at: Utc::now(), - hidden: false, - }; - - let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); - let pub_id = Uuid::new_v4(); - let maybe_object_id = None; - - #[rustfmt::skip] - let expected = [ - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react/package.json"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo3.jpeg"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/text.txt"), false), metadata }, - ] - .into_iter() - .collect::>(); - - let walk_result = walk( - root_path.to_path_buf(), - root_path.to_path_buf(), - &mut [], - |_, _| {}, - |_| async { Ok(vec![]) }, - |_, _| async { Ok(vec![]) }, - |path, is_dir| { - IsolatedFilePathData::new(0, root_path, path, is_dir).map_err(Into::into) - }, - 420, - ) - .await - .unwrap(); - - if !walk_result.errors.is_empty() { - panic!("errors: {:#?}", walk_result.errors); - } - - let actual = walk_result.walked.collect::>(); - - if actual != expected { - panic!("difference: {:#?}", expected.difference(&actual)); - } - } - - #[tokio::test] - // #[traced_test] - async fn test_only_photos() { - let root = prepare_location().await; - let root_path = root.path(); - - let metadata = FilePathMetadata { - inode: 0, - size_in_bytes: 0, - created_at: Utc::now(), - modified_at: Utc::now(), - hidden: false, - }; - - let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); - let pub_id = Uuid::new_v4(); - let maybe_object_id = None; - - #[rustfmt::skip] - let expected = [ - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo1.png"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo2.jpg"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("photos/photo3.jpeg"), false), metadata }, - ] - .into_iter() - .collect::>(); - - let mut only_photos_rule = vec![new_indexer_rule( - "only photos".to_string(), - false, - vec![RulePerKind::AcceptFilesByGlob( - vec![], - GlobSetBuilder::new() - .add(Glob::new("{*.png,*.jpg,*.jpeg}").unwrap()) - .build() - .unwrap(), - )], - )]; - - let walk_result = walk( - root_path.to_path_buf(), - root_path.to_path_buf(), - &mut only_photos_rule, - |_, _| {}, - |_| async { Ok(vec![]) }, - |_, _| async { Ok(vec![]) }, - |path, is_dir| { - IsolatedFilePathData::new(0, root_path, path, is_dir).map_err(Into::into) - }, - 420, - ) - .await - .unwrap(); - - if !walk_result.errors.is_empty() { - panic!("errors: {:#?}", walk_result.errors); - } - - let actual = walk_result.walked.collect::>(); - - if actual != expected { - panic!("difference: {:#?}", expected.difference(&actual)); - } - } - - #[tokio::test] - // #[traced_test] - async fn test_git_repos() { - let root = prepare_location().await; - let root_path = root.path(); - - let metadata = FilePathMetadata { - inode: 0, - size_in_bytes: 0, - created_at: Utc::now(), - modified_at: Utc::now(), - hidden: false, - }; - - let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); - let pub_id = Uuid::new_v4(); - let maybe_object_id = None; - - #[rustfmt::skip] - let expected = [ - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/target/debug/main"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/node_modules/react/package.json"), false), metadata }, - ] - .into_iter() - .collect::>(); - - let mut git_repos = vec![new_indexer_rule( - "git repos".to_string(), - false, - vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent( - [".git".to_string()].into_iter().collect(), - )], - )]; - - let walk_result = walk( - root_path.to_path_buf(), - root_path.to_path_buf(), - &mut git_repos, - |_, _| {}, - |_| async { Ok(vec![]) }, - |_, _| async { Ok(vec![]) }, - |path, is_dir| { - IsolatedFilePathData::new(0, root_path, path, is_dir).map_err(Into::into) - }, - 420, - ) - .await - .unwrap(); - - if !walk_result.errors.is_empty() { - panic!("errors: {:#?}", walk_result.errors); - } - - let actual = walk_result.walked.collect::>(); - - if actual != expected { - let not_found = expected.difference(&actual); - let not_expected = actual.difference(&expected); - panic!("difference:\nexpected, but not found: {not_found:#?}\nfound, but not expected:{not_expected:#?}"); - } - } - - #[tokio::test] - // #[traced_test] - async fn git_repos_without_deps_or_build_dirs() { - let root = prepare_location().await; - let root_path = root.path(); - - let metadata = FilePathMetadata { - inode: 0, - size_in_bytes: 0, - created_at: Utc::now(), - modified_at: Utc::now(), - hidden: false, - }; - - let f = |path, is_dir| IsolatedFilePathData::new(0, root_path, path, is_dir).unwrap(); - let pub_id = Uuid::new_v4(); - let maybe_object_id = None; - - #[rustfmt::skip] - let expected = [ - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/Cargo.toml"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("rust_project/src/main.rs"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/.git"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/package.json"), false), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src"), true), metadata }, - WalkedEntry { pub_id, maybe_object_id, iso_file_path: f(root_path.join("inner/node_project/src/App.tsx"), false), metadata }, - ] - .into_iter() - .collect::>(); - - let mut git_repos_no_deps_no_build_dirs = vec![ - new_indexer_rule( - "git repos".to_string(), - false, - vec![RulePerKind::AcceptIfChildrenDirectoriesArePresent( - [".git".to_string()].into_iter().collect(), - )], - ), - new_indexer_rule( - "reject node_modules".to_string(), - false, - vec![RulePerKind::RejectFilesByGlob( - vec![], - GlobSetBuilder::new() - .add(Glob::new("{**/node_modules/*,**/node_modules}").unwrap()) - .build() - .unwrap(), - )], - ), - new_indexer_rule( - "reject rust build dir".to_string(), - false, - vec![RulePerKind::RejectFilesByGlob( - vec![], - GlobSetBuilder::new() - .add(Glob::new("{**/target/*,**/target}").unwrap()) - .build() - .unwrap(), - )], - ), - ]; - - let walk_result = walk( - root_path.to_path_buf(), - root_path.to_path_buf(), - &mut git_repos_no_deps_no_build_dirs, - |_, _| {}, - |_| async { Ok(vec![]) }, - |_, _| async { Ok(vec![]) }, - |path, is_dir| { - IsolatedFilePathData::new(0, root_path, path, is_dir).map_err(Into::into) - }, - 420, - ) - .await - .unwrap(); - - if !walk_result.errors.is_empty() { - panic!("errors: {:#?}", walk_result.errors); - } - - let actual = walk_result.walked.collect::>(); - - if actual != expected { - let not_found = expected.difference(&actual); - let not_expected = actual.difference(&expected); - panic!("difference:\nexpected, but not found: {not_found:#?}\nfound, but not expected:{not_expected:#?}"); - } - } -} diff --git a/core/src/location/manager/watcher/android.rs b/core/src/location/manager/watcher/android.rs index e60d6515cf25..b38e6a358294 100644 --- a/core/src/location/manager/watcher/android.rs +++ b/core/src/location/manager/watcher/android.rs @@ -70,7 +70,7 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> { match kind { EventKind::Create(CreateKind::File) | EventKind::Modify(ModifyKind::Data(DataChange::Any)) => { - // When we receive a create, modify data or metadata events of the abore kinds + // When we receive a create, modify data or metadata events of the above kinds // we just mark the file to be updated in a near future // each consecutive event of these kinds that we receive for the same file // we just store the path again in the map below, with a new instant @@ -107,7 +107,7 @@ impl<'lib> EventHandler<'lib> for AndroidEventHandler<'lib> { .await?; } EventKind::Modify(ModifyKind::Name(RenameMode::From)) => { - // Just in case we can't garantee that we receive the Rename From event before the + // Just in case we can't guarantee that we receive the Rename From event before the // Rename Both event. Just a safeguard if self.recently_renamed_from.remove(&paths[0]).is_none() { self.rename_from.insert(paths.remove(0), Instant::now()); diff --git a/core/src/location/manager/watcher/ios.rs b/core/src/location/manager/watcher/ios.rs index 63f512e52edf..df5ba28be0f7 100644 --- a/core/src/location/manager/watcher/ios.rs +++ b/core/src/location/manager/watcher/ios.rs @@ -100,7 +100,7 @@ impl<'lib> EventHandler<'lib> for IosEventHandler<'lib> { | EventKind::Modify(ModifyKind::Metadata( MetadataKind::WriteTime | MetadataKind::Extended, )) => { - // When we receive a create, modify data or metadata events of the abore kinds + // When we receive a create, modify data or metadata events of the above kinds // we just mark the file to be updated in a near future // each consecutive event of these kinds that we receive for the same file // we just store the path again in the map below, with a new instant diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index db30596f9872..50b2380a37ff 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -6,7 +6,7 @@ use crate::{ indexer::reverse_update_directories_sizes, location_with_indexer_rules, manager::LocationManagerError, scan_location_sub_path, update_location_size, }, - object::{media::get_indexed_thumbnail_path, validation::hash::file_checksum}, + object::validation::hash::file_checksum, Node, }; @@ -23,7 +23,7 @@ use sd_core_heavy_lifting::{ ThumbnailKind, }, }; -use sd_core_prisma_helpers::{file_path_with_object, ObjectPubId}; +use sd_core_prisma_helpers::{file_path_with_object, CasId, ObjectPubId}; use sd_file_ext::{ extensions::{AudioExtension, ImageExtension, VideoExtension}, @@ -250,7 +250,7 @@ async fn inner_create_file( let existing_object = db .object() .find_first(vec![object::file_paths::some(vec![ - file_path::cas_id::equals(cas_id.clone()), + file_path::cas_id::equals(cas_id.clone().map(Into::into)), file_path::pub_id::not(created_file.pub_id.clone()), ])]) .select(object_ids::select()) @@ -494,7 +494,7 @@ async fn inner_update_file( }; let is_hidden = path_is_hidden(full_path, &fs_metadata); - if file_path.cas_id != cas_id { + if file_path.cas_id.as_deref() != cas_id.as_ref().map(CasId::as_str) { let (sync_params, db_params): (Vec<_>, Vec<_>) = { use file_path::*; @@ -665,16 +665,17 @@ async fn inner_update_file( .await?; } - if let Some(old_cas_id) = &file_path.cas_id { + if let Some(old_cas_id) = file_path.cas_id.as_ref().map(CasId::from) { // if this file had a thumbnail previously, we update it to match the new content - if library.thumbnail_exists(node, old_cas_id).await? { + if library.thumbnail_exists(node, &old_cas_id).await? { if let Some(ext) = file_path.extension.clone() { // Running in a detached task as thumbnail generation can take a while and we don't want to block the watcher if let Some(cas_id) = cas_id { let node = Arc::clone(node); let path = full_path.to_path_buf(); let library_id = library.id; - let old_cas_id = old_cas_id.clone(); + let old_cas_id = old_cas_id.to_owned(); + spawn(async move { let thumbnails_directory = get_thumbnails_directory(node.config.data_directory()); @@ -696,8 +697,8 @@ async fn inner_update_file( // so we overwrote our previous thumbnail, so we can't remove it if !was_overwritten { // remove the old thumbnail as we're generating a new one - let thumb_path = - get_indexed_thumbnail_path(&node, &old_cas_id, library_id); + let thumb_path = ThumbnailKind::Indexed(library_id) + .compute_path(node.config.data_directory(), &old_cas_id); if let Err(e) = fs::remove_file(&thumb_path).await { error!( "Failed to remove old thumbnail: {:#?}", diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index 688197aa90af..213f0026b979 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -2,7 +2,6 @@ use crate::{ context::NodeContext, invalidate_query, library::Library, - // old_job::{JobBuilder, JobError, JobManagerError}, Node, }; @@ -16,7 +15,7 @@ use sd_core_heavy_lifting::{ media_processor::{self, job::MediaProcessor}, JobEnqueuer, JobId, JobSystemError, }; -use sd_core_prisma_helpers::location_with_indexer_rules; +use sd_core_prisma_helpers::{location_with_indexer_rules, CasId}; use sd_prisma::{ prisma::{file_path, indexer_rules_in_location, location, PrismaClient}, @@ -46,13 +45,11 @@ use tracing::{debug, error, info, warn}; use uuid::Uuid; mod error; -// pub mod indexer; mod manager; pub mod metadata; pub mod non_indexed; pub use error::LocationError; -// use indexer::OldIndexerJobInit; pub use manager::{LocationManagerError, Locations}; use metadata::SpacedriveLocationMetadataFile; @@ -604,18 +601,6 @@ pub async fn light_scan_location( let location_base_data = location::Data::from(&location); - // indexer::old_shallow(&location, &sub_path, &node, &library).await?; - // old_file_identifier::old_shallow(&location_base_data, &sub_path, &library).await?; - // old_media_processor::old_shallow( - // &location_base_data, - // &sub_path, - // &library, - // #[cfg(feature = "ai")] - // false, - // &node, - // ) - // .await?; - let dispatcher = node.task_system.get_dispatcher(); let ctx = NodeContext { node, library }; @@ -1093,7 +1078,7 @@ pub async fn create_file_path( extension, .. }: IsolatedFilePathDataParts<'_>, - cas_id: Option, + cas_id: Option>, metadata: sd_core_file_path_helper::FilePathMetadata, ) -> Result { use sd_utils::db::inode_to_db; @@ -1125,7 +1110,10 @@ pub async fn create_file_path( ), location::connect(prisma::location::id::equals(location.id)), ), - ((cas_id::NAME, msgpack!(cas_id)), cas_id::set(cas_id)), + ( + (cas_id::NAME, msgpack!(cas_id)), + cas_id::set(cas_id.map(Into::into)), + ), ( (materialized_path::NAME, msgpack!(materialized_path)), materialized_path::set(Some(materialized_path.into())), diff --git a/core/src/location/non_indexed.rs b/core/src/location/non_indexed.rs index 5cd432e5c83d..1eba078593bf 100644 --- a/core/src/location/non_indexed.rs +++ b/core/src/location/non_indexed.rs @@ -1,18 +1,12 @@ -use crate::{ - api::locations::ExplorerItem, - context::NodeContext, - library::Library, - object::{ - cas::generate_cas_id, - // media::old_thumbnail::{get_ephemeral_thumb_key, BatchToProcess, GenerateThumbnailArgs}, - }, - Node, -}; +use crate::{api::locations::ExplorerItem, context::NodeContext, library::Library, Node}; use sd_core_file_path_helper::{path_is_hidden, MetadataExt}; -use sd_core_heavy_lifting::media_processor::{ - self, get_thumbnails_directory, thumbnailer::NewThumbnailReporter, GenerateThumbnailArgs, - NewThumbnailsReporter, ThumbKey, +use sd_core_heavy_lifting::{ + file_identifier::generate_cas_id, + media_processor::{ + self, get_thumbnails_directory, thumbnailer::NewThumbnailReporter, GenerateThumbnailArgs, + NewThumbnailsReporter, ThumbKey, + }, }; use sd_core_indexer_rules::{ seed::{NO_HIDDEN, NO_SYSTEM_FILES}, @@ -227,12 +221,12 @@ pub async fn walk( )); } - ( - Some(ThumbKey::new_ephemeral(&cas_id)), - node.ephemeral_thumbnail_exists(&cas_id) - .await - .map_err(NonIndexedLocationError::from)?, - ) + let thumb_exists = node + .ephemeral_thumbnail_exists(&cas_id) + .await + .map_err(NonIndexedLocationError::from)?; + + (Some(ThumbKey::new_ephemeral(cas_id)), thumb_exists) } else { (None, false) } diff --git a/core/src/node/config.rs b/core/src/node/config.rs index eb3bd102bf2c..8892756d6775 100644 --- a/core/src/node/config.rs +++ b/core/src/node/config.rs @@ -155,6 +155,7 @@ mod identity_serde { #[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq, Eq, Type)] pub struct NodePreferences { // pub thumbnailer: ThumbnailerPreferences, + // TODO(fogodev): introduce preferences to choose how many worker the task system should have } #[derive( @@ -363,11 +364,6 @@ impl Manager { self.config.read().await.clone() } - /// get a node config preferences watcher receiver - pub(crate) fn preferences_watcher(&self) -> watch::Receiver { - self.preferences_watcher_tx.subscribe() - } - /// data_directory returns the path to the directory storing the configuration data. pub(crate) fn data_directory(&self) -> PathBuf { self.data_directory_path.clone() diff --git a/core/src/object/cas.rs b/core/src/object/cas.rs deleted file mode 100644 index 43a1be0f40ed..000000000000 --- a/core/src/object/cas.rs +++ /dev/null @@ -1,62 +0,0 @@ -use std::path::Path; - -use blake3::Hasher; -use static_assertions::const_assert; -use tokio::{ - fs::{self, File}, - io::{self, AsyncReadExt, AsyncSeekExt, SeekFrom}, -}; - -const SAMPLE_COUNT: u64 = 4; -const SAMPLE_SIZE: u64 = 1024 * 10; -const HEADER_OR_FOOTER_SIZE: u64 = 1024 * 8; - -// minimum file size of 100KiB, to avoid sample hashing for small files as they can be smaller than the total sample size -const MINIMUM_FILE_SIZE: u64 = 1024 * 100; - -// Asserting that nobody messed up our consts -const_assert!((HEADER_OR_FOOTER_SIZE * 2 + SAMPLE_COUNT * SAMPLE_SIZE) < MINIMUM_FILE_SIZE); - -// Asserting that the sample size is larger than header/footer size, as the same buffer is used for both -const_assert!(SAMPLE_SIZE > HEADER_OR_FOOTER_SIZE); - -pub async fn generate_cas_id(path: impl AsRef, size: u64) -> Result { - let mut hasher = Hasher::new(); - hasher.update(&size.to_le_bytes()); - - if size <= MINIMUM_FILE_SIZE { - // For small files, we hash the whole file - hasher.update(&fs::read(path).await?); - } else { - let mut file = File::open(path).await?; - let mut buf = vec![0; SAMPLE_SIZE as usize].into_boxed_slice(); - - // Hashing the header - let mut current_pos = file - .read_exact(&mut buf[..HEADER_OR_FOOTER_SIZE as usize]) - .await? as u64; - hasher.update(&buf[..HEADER_OR_FOOTER_SIZE as usize]); - - // Sample hashing the inner content of the file - let seek_jump = (size - HEADER_OR_FOOTER_SIZE * 2) / SAMPLE_COUNT; - loop { - file.read_exact(&mut buf).await?; - hasher.update(&buf); - - if current_pos >= (HEADER_OR_FOOTER_SIZE + seek_jump * (SAMPLE_COUNT - 1)) { - break; - } - - current_pos = file.seek(SeekFrom::Start(current_pos + seek_jump)).await?; - } - - // Hashing the footer - file.seek(SeekFrom::End(-(HEADER_OR_FOOTER_SIZE as i64))) - .await?; - file.read_exact(&mut buf[..HEADER_OR_FOOTER_SIZE as usize]) - .await?; - hasher.update(&buf[..HEADER_OR_FOOTER_SIZE as usize]); - } - - Ok(hasher.finalize().to_hex()[..16].to_string()) -} diff --git a/core/src/object/media/exif_metadata_extractor.rs b/core/src/object/media/exif_metadata_extractor.rs deleted file mode 100644 index 96a815c8001d..000000000000 --- a/core/src/object/media/exif_metadata_extractor.rs +++ /dev/null @@ -1,164 +0,0 @@ -use crate::old_job::JobRunErrors; - -use sd_core_file_path_helper::IsolatedFilePathData; -use sd_core_prisma_helpers::file_path_for_media_processor; - -use sd_file_ext::extensions::{Extension, ImageExtension, ALL_IMAGE_EXTENSIONS}; -use sd_media_metadata::ExifMetadata; -use sd_prisma::prisma::{exif_data, location, PrismaClient}; - -use std::{collections::HashSet, path::Path}; - -use futures_concurrency::future::Join; -use once_cell::sync::Lazy; -use serde::{Deserialize, Serialize}; -use thiserror::Error; -use tracing::error; - -use super::exif_data_image_to_query; - -#[derive(Error, Debug)] -pub enum ExifDataError { - // Internal errors - #[error("database error: {0}")] - Database(#[from] prisma_client_rust::QueryError), - #[error(transparent)] - MediaData(#[from] sd_media_metadata::Error), -} - -#[derive(Serialize, Deserialize, Default, Debug)] -pub struct OldExifDataExtractorMetadata { - pub extracted: u32, - pub skipped: u32, -} - -pub(super) static FILTERED_IMAGE_EXTENSIONS: Lazy> = Lazy::new(|| { - ALL_IMAGE_EXTENSIONS - .iter() - .cloned() - .filter(can_extract_exif_data_for_image) - .map(Extension::Image) - .collect() -}); - -pub const fn can_extract_exif_data_for_image(image_extension: &ImageExtension) -> bool { - use ImageExtension::*; - matches!( - image_extension, - Tiff | Dng | Jpeg | Jpg | Heif | Heifs | Heic | Avif | Avcs | Avci | Hif | Png | Webp - ) -} - -pub async fn extract_exif_data( - path: impl AsRef + Send, -) -> Result, ExifDataError> { - ExifMetadata::from_path(path).await.map_err(Into::into) -} - -pub async fn process( - files_paths: &[file_path_for_media_processor::Data], - location_id: location::id::Type, - location_path: impl AsRef, - db: &PrismaClient, - ctx_update_fn: &impl Fn(usize), -) -> Result<(OldExifDataExtractorMetadata, JobRunErrors), ExifDataError> { - let mut run_metadata = OldExifDataExtractorMetadata::default(); - if files_paths.is_empty() { - return Ok((run_metadata, JobRunErrors::default())); - } - - let location_path = location_path.as_ref(); - - let objects_already_with_exif_data = db - .exif_data() - .find_many(vec![exif_data::object_id::in_vec( - files_paths - .iter() - .filter_map(|file_path| file_path.object_id) - .collect(), - )]) - .select(exif_data::select!({ object_id })) - .exec() - .await?; - - if files_paths.len() == objects_already_with_exif_data.len() { - // All files already have media data, skipping - run_metadata.skipped = files_paths.len() as u32; - return Ok((run_metadata, JobRunErrors::default())); - } - - let objects_already_with_exif_data = objects_already_with_exif_data - .into_iter() - .map(|exif_data| exif_data.object_id) - .collect::>(); - - run_metadata.skipped = objects_already_with_exif_data.len() as u32; - - let (exif_datas, errors) = { - let maybe_exif_data = files_paths - .iter() - .enumerate() - .filter_map(|(idx, file_path)| { - file_path.object_id.and_then(|object_id| { - (!objects_already_with_exif_data.contains(&object_id)) - .then_some((idx, file_path, object_id)) - }) - }) - .filter_map(|(idx, file_path, object_id)| { - IsolatedFilePathData::try_from((location_id, file_path)) - .map_err(|e| error!("{e:#?}")) - .ok() - .map(|iso_file_path| (idx, location_path.join(iso_file_path), object_id)) - }) - .map(|(idx, path, object_id)| async move { - let res = extract_exif_data(&path).await; - ctx_update_fn(idx + 1); - (res, path, object_id) - }) - .collect::>() - .join() - .await; - - let total_exif_data = maybe_exif_data.len(); - - maybe_exif_data.into_iter().fold( - // In the good case, all exif data were extracted - (Vec::with_capacity(total_exif_data), Vec::new()), - |(mut exif_datas, mut errors), (maybe_exif_data, path, object_id)| { - match maybe_exif_data { - Ok(Some(exif_data)) => exif_datas.push((exif_data, object_id)), - Ok(None) => { - // No exif data on path, skipping - run_metadata.skipped += 1; - } - Err(e) => errors.push((e, path)), - } - (exif_datas, errors) - }, - ) - }; - - let created = db - .exif_data() - .create_many( - exif_datas - .into_iter() - .map(|(exif_data, object_id)| exif_data_image_to_query(exif_data, object_id)) - .collect(), - ) - .skip_duplicates() - .exec() - .await?; - - run_metadata.extracted = created as u32; - run_metadata.skipped += errors.len() as u32; - - Ok(( - run_metadata, - errors - .into_iter() - .map(|(e, path)| format!("Couldn't process file: \"{}\"; Error: {e}", path.display())) - .collect::>() - .into(), - )) -} diff --git a/core/src/object/media/ffmpeg_metadata_extractor.rs b/core/src/object/media/ffmpeg_metadata_extractor.rs deleted file mode 100644 index 754098ecc617..000000000000 --- a/core/src/object/media/ffmpeg_metadata_extractor.rs +++ /dev/null @@ -1,660 +0,0 @@ -use crate::old_job::JobRunErrors; - -use prisma_client_rust::QueryError; -use sd_core_file_path_helper::IsolatedFilePathData; -use sd_core_prisma_helpers::file_path_for_media_processor; - -use sd_file_ext::extensions::{ - AudioExtension, Extension, VideoExtension, ALL_AUDIO_EXTENSIONS, ALL_VIDEO_EXTENSIONS, -}; -use sd_media_metadata::{ - ffmpeg::{ - audio_props::AudioProps, - chapter::Chapter, - codec::{Codec, Props}, - metadata::Metadata, - program::Program, - stream::Stream, - video_props::VideoProps, - }, - FFmpegMetadata, -}; -use sd_prisma::prisma::{ - ffmpeg_data, ffmpeg_media_audio_props, ffmpeg_media_chapter, ffmpeg_media_codec, - ffmpeg_media_program, ffmpeg_media_stream, ffmpeg_media_video_props, location, object, - PrismaClient, -}; -use sd_utils::db::ffmpeg_data_field_to_db; - -use std::{ - collections::{HashMap, HashSet}, - path::Path, -}; - -use futures_concurrency::future::{Join, TryJoin}; -use once_cell::sync::Lazy; -use serde::{Deserialize, Serialize}; -use thiserror::Error; -use tracing::error; - -#[derive(Error, Debug)] -pub enum FFmpegDataError { - // Internal errors - #[error("database error: {0}")] - Database(#[from] prisma_client_rust::QueryError), - #[error(transparent)] - MediaData(#[from] sd_media_metadata::Error), -} - -#[derive(Serialize, Deserialize, Default, Debug)] -pub struct OldFFmpegDataExtractorMetadata { - pub extracted: u32, - pub skipped: u32, -} - -pub(super) static FILTERED_AUDIO_AND_VIDEO_EXTENSIONS: Lazy> = Lazy::new(|| { - ALL_AUDIO_EXTENSIONS - .iter() - .copied() - .filter(can_extract_ffmpeg_data_for_audio) - .map(Extension::Audio) - .chain( - ALL_VIDEO_EXTENSIONS - .iter() - .copied() - .filter(can_extract_ffmpeg_data_for_video) - .map(Extension::Video), - ) - .collect() -}); - -pub const fn can_extract_ffmpeg_data_for_audio(audio_extension: &AudioExtension) -> bool { - use AudioExtension::*; - // TODO: Remove from here any extension which ffmpeg can't extract metadata from - matches!( - audio_extension, - Mp3 | Mp2 - | M4a | Wav | Aiff - | Aif | Flac | Ogg - | Oga | Opus | Wma - | Amr | Aac | Wv - | Voc | Tta | Loas - | Caf | Aptx | Adts - | Ast | Mid - ) -} - -pub const fn can_extract_ffmpeg_data_for_video(video_extension: &VideoExtension) -> bool { - use VideoExtension::*; - // TODO: Remove from here any extension which ffmpeg can't extract metadata from - matches!( - video_extension, - Avi | Avifs - | Qt | Mov | Swf - | Mjpeg | Ts | Mts - | Mpeg | Mxf | M2v - | Mpg | Mpe | M2ts - | Flv | Wm | _3gp - | M4v | Wmv | Asf - | Mp4 | Webm | Mkv - | Vob | Ogv | Wtv - | Hevc | F4v - ) -} - -pub async fn extract_ffmpeg_data( - path: impl AsRef + Send, -) -> Result { - FFmpegMetadata::from_path(path).await.map_err(Into::into) -} - -pub async fn process( - files_paths: &[file_path_for_media_processor::Data], - location_id: location::id::Type, - location_path: impl AsRef + Send, - db: &PrismaClient, - ctx_update_fn: &impl Fn(usize), -) -> Result<(OldFFmpegDataExtractorMetadata, JobRunErrors), FFmpegDataError> { - let mut run_metadata = OldFFmpegDataExtractorMetadata::default(); - if files_paths.is_empty() { - return Ok((run_metadata, JobRunErrors::default())); - } - - let location_path = location_path.as_ref(); - - let objects_already_with_ffmpeg_data = db - .ffmpeg_data() - .find_many(vec![ffmpeg_data::object_id::in_vec( - files_paths - .iter() - .filter_map(|file_path| file_path.object_id) - .collect(), - )]) - .select(ffmpeg_data::select!({ object_id })) - .exec() - .await?; - - if files_paths.len() == objects_already_with_ffmpeg_data.len() { - // All files already have media data, skipping - run_metadata.skipped = files_paths.len() as u32; - return Ok((run_metadata, JobRunErrors::default())); - } - - let objects_already_with_ffmpeg_data = objects_already_with_ffmpeg_data - .into_iter() - .map(|ffmpeg_data| ffmpeg_data.object_id) - .collect::>(); - - run_metadata.skipped = objects_already_with_ffmpeg_data.len() as u32; - - let mut errors = vec![]; - - let ffmpeg_datas = files_paths - .iter() - .enumerate() - .filter_map(|(idx, file_path)| { - file_path.object_id.and_then(|object_id| { - (!objects_already_with_ffmpeg_data.contains(&object_id)) - .then_some((idx, file_path, object_id)) - }) - }) - .filter_map(|(idx, file_path, object_id)| { - IsolatedFilePathData::try_from((location_id, file_path)) - .map_err(|e| error!("{e:#?}")) - .ok() - .map(|iso_file_path| (idx, location_path.join(iso_file_path), object_id)) - }) - .map(|(idx, path, object_id)| async move { - let res = extract_ffmpeg_data(&path).await; - ctx_update_fn(idx + 1); - (res, path, object_id) - }) - .collect::>() - .join() - .await - .into_iter() - .filter_map(|(res, path, object_id)| { - res.map(|ffmpeg_data| (ffmpeg_data, object_id)) - .map_err(|e| errors.push((e, path))) - .ok() - }) - .collect::>(); - - let created = save_ffmpeg_data(ffmpeg_datas, db).await?; - - run_metadata.extracted = created as u32; - run_metadata.skipped += errors.len() as u32; - - Ok(( - run_metadata, - errors - .into_iter() - .map(|(e, path)| format!("Couldn't process file: \"{}\"; Error: {e}", path.display())) - .collect::>() - .into(), - )) -} - -pub async fn save_ffmpeg_data( - ffmpeg_datas: impl IntoIterator, - db: &PrismaClient, -) -> Result { - ffmpeg_datas - .into_iter() - .map( - move |( - FFmpegMetadata { - formats, - duration, - start_time, - bit_rate, - chapters, - programs, - metadata, - }, - object_id, - )| { - db._transaction() - .with_timeout(30 * 1000) - .run(move |db| async move { - let data_id = create_ffmpeg_data( - formats, bit_rate, duration, start_time, metadata, object_id, &db, - ) - .await?; - - create_ffmpeg_chapters(data_id, chapters, &db).await?; - - let streams = create_ffmpeg_programs(data_id, programs, &db).await?; - - let codecs = create_ffmpeg_streams(data_id, streams, &db).await?; - - let (audio_props, video_props) = - create_ffmpeg_codecs(data_id, codecs, &db).await?; - - ( - create_ffmpeg_audio_props(audio_props, &db), - create_ffmpeg_video_props(video_props, &db), - ) - .try_join() - .await - .map(|_| ()) - }) - }, - ) - .collect::>() - .try_join() - .await - .map(|created| created.len() as u32) -} - -async fn create_ffmpeg_data( - formats: Vec, - bit_rate: (i32, u32), - duration: Option<(i32, u32)>, - start_time: Option<(i32, u32)>, - metadata: Metadata, - object_id: i32, - db: &PrismaClient, -) -> Result { - db.ffmpeg_data() - .create( - formats.join(","), - ffmpeg_data_field_to_db((bit_rate.0 as i64) << 32 | bit_rate.1 as i64), - object::id::equals(object_id), - vec![ - ffmpeg_data::duration::set( - duration.map(|(a, b)| ffmpeg_data_field_to_db((a as i64) << 32 | b as i64)), - ), - ffmpeg_data::start_time::set( - start_time.map(|(a, b)| ffmpeg_data_field_to_db((a as i64) << 32 | b as i64)), - ), - ffmpeg_data::metadata::set( - serde_json::to_vec(&metadata) - .map_err(|err| { - error!("Error reading FFmpegData metadata: {err:#?}"); - err - }) - .ok(), - ), - ], - ) - .select(ffmpeg_data::select!({ id })) - .exec() - .await - .map(|data| data.id) -} - -async fn create_ffmpeg_chapters( - ffmpeg_data_id: ffmpeg_data::id::Type, - chapters: Vec, - db: &PrismaClient, -) -> Result<(), QueryError> { - db.ffmpeg_media_chapter() - .create_many( - chapters - .into_iter() - .map( - |Chapter { - id: chapter_id, - start: (start_high, start_low), - end: (end_high, end_low), - time_base_den, - time_base_num, - metadata, - }| ffmpeg_media_chapter::CreateUnchecked { - chapter_id, - start: ffmpeg_data_field_to_db( - (start_high as i64) << 32 | start_low as i64, - ), - end: ffmpeg_data_field_to_db((end_high as i64) << 32 | end_low as i64), - time_base_den, - time_base_num, - ffmpeg_data_id, - _params: vec![ffmpeg_media_chapter::metadata::set( - serde_json::to_vec(&metadata) - .map_err(|err| { - error!("Error reading FFmpegMediaChapter metadata: {err:#?}"); - err - }) - .ok(), - )], - }, - ) - .collect(), - ) - .exec() - .await - .map(|_| ()) -} - -async fn create_ffmpeg_programs( - data_id: i32, - programs: Vec, - db: &PrismaClient, -) -> Result)>, QueryError> { - let (creates, streams_by_program_id) = - programs - .into_iter() - .map( - |Program { - id: program_id, - name, - metadata, - streams, - }| { - ( - ffmpeg_media_program::CreateUnchecked { - program_id, - ffmpeg_data_id: data_id, - _params: vec![ - ffmpeg_media_program::name::set(name.clone()), - ffmpeg_media_program::metadata::set( - serde_json::to_vec(&metadata) - .map_err(|err| { - error!("Error reading FFmpegMediaProgram metadata: {err:#?}"); - err - }) - .ok(), - ), - ], - }, - (program_id, streams), - ) - }, - ) - .unzip::<_, _, Vec<_>, Vec<_>>(); - - db.ffmpeg_media_program() - .create_many(creates) - .exec() - .await - .map(|_| streams_by_program_id) -} - -async fn create_ffmpeg_streams( - ffmpeg_data_id: ffmpeg_data::id::Type, - streams: Vec<(ffmpeg_media_program::program_id::Type, Vec)>, - db: &PrismaClient, -) -> Result< - Vec<( - ffmpeg_media_program::program_id::Type, - ffmpeg_media_stream::stream_id::Type, - Codec, - )>, - QueryError, -> { - let (creates, maybe_codecs) = streams - .into_iter() - .flat_map(|(program_id, streams)| { - streams.into_iter().map( - move |Stream { - id: stream_id, - name, - codec: maybe_codec, - aspect_ratio_num, - aspect_ratio_den, - frames_per_second_num, - frames_per_second_den, - time_base_real_den, - time_base_real_num, - dispositions, - metadata, - }| { - ( - ffmpeg_media_stream::CreateUnchecked { - stream_id, - aspect_ratio_num, - aspect_ratio_den, - frames_per_second_num, - frames_per_second_den, - time_base_real_den, - time_base_real_num, - program_id, - ffmpeg_data_id, - _params: vec![ - ffmpeg_media_stream::name::set(name), - ffmpeg_media_stream::dispositions::set( - (!dispositions.is_empty()).then_some(dispositions.join(",")), - ), - ffmpeg_media_stream::title::set(metadata.title.clone()), - ffmpeg_media_stream::encoder::set(metadata.encoder.clone()), - ffmpeg_media_stream::language::set(metadata.language.clone()), - ffmpeg_media_stream::metadata::set( - serde_json::to_vec(&metadata) - .map_err(|err| { - error!("Error reading FFmpegMediaStream metadata: {err:#?}"); - err - }) - .ok(), - ), - ], - }, - maybe_codec.map(|codec| (program_id, stream_id, codec)), - ) - }, - ) - }) - .unzip::<_, _, Vec<_>, Vec<_>>(); - - db.ffmpeg_media_stream() - .create_many(creates) - .exec() - .await - .map(|_| maybe_codecs.into_iter().flatten().collect()) -} - -async fn create_ffmpeg_codecs( - ffmpeg_data_id: ffmpeg_data::id::Type, - codecs: Vec<( - ffmpeg_media_program::program_id::Type, - ffmpeg_media_stream::stream_id::Type, - Codec, - )>, - db: &PrismaClient, -) -> Result< - ( - Vec<(ffmpeg_media_codec::id::Type, AudioProps)>, - Vec<(ffmpeg_media_codec::id::Type, VideoProps)>, - ), - QueryError, -> { - let expected_creates = codecs.len(); - - let (creates, mut audio_props, mut video_props) = codecs.into_iter().enumerate().fold( - ( - Vec::with_capacity(expected_creates), - HashMap::with_capacity(expected_creates), - HashMap::with_capacity(expected_creates), - ), - |(mut creates, mut audio_props, mut video_props), - ( - idx, - ( - program_id, - stream_id, - Codec { - kind, - sub_kind, - tag, - name, - profile, - bit_rate, - props: maybe_props, - }, - ), - )| { - creates.push(ffmpeg_media_codec::CreateUnchecked { - bit_rate, - stream_id, - program_id, - ffmpeg_data_id, - _params: vec![ - ffmpeg_media_codec::kind::set(kind), - ffmpeg_media_codec::sub_kind::set(sub_kind), - ffmpeg_media_codec::tag::set(tag), - ffmpeg_media_codec::name::set(name), - ffmpeg_media_codec::profile::set(profile), - ], - }); - - if let Some(props) = maybe_props { - match props { - Props::Audio(props) => { - audio_props.insert(idx, props); - } - Props::Video(props) => { - video_props.insert(idx, props); - } - Props::Subtitle(_) => { - // We don't care about subtitles props for now :D - } - } - } - - (creates, audio_props, video_props) - }, - ); - - let created_ids = creates - .into_iter() - .map( - |ffmpeg_media_codec::CreateUnchecked { - bit_rate, - stream_id, - program_id, - ffmpeg_data_id, - _params, - }| { - db.ffmpeg_media_codec() - .create_unchecked(bit_rate, stream_id, program_id, ffmpeg_data_id, _params) - .select(ffmpeg_media_codec::select!({ id })) - .exec() - }, - ) - .collect::>() - .try_join() - .await?; - - assert_eq!( - created_ids.len(), - expected_creates, - "Not all codecs were created and our invariant is broken!" - ); - - debug_assert!( - created_ids - .windows(2) - .all(|window| window[0].id < window[1].id), - "Codecs were created in a different order than we expected, our invariant is broken!" - ); - - Ok(created_ids.into_iter().enumerate().fold( - ( - Vec::with_capacity(audio_props.len()), - Vec::with_capacity(video_props.len()), - ), - |(mut a_props, mut v_props), (idx, codec_data)| { - if let Some(audio_props) = audio_props.remove(&idx) { - a_props.push((codec_data.id, audio_props)); - } else if let Some(video_props) = video_props.remove(&idx) { - v_props.push((codec_data.id, video_props)); - } - - (a_props, v_props) - }, - )) -} - -async fn create_ffmpeg_audio_props( - audio_props: Vec<(ffmpeg_media_codec::id::Type, AudioProps)>, - db: &PrismaClient, -) -> Result<(), QueryError> { - db.ffmpeg_media_audio_props() - .create_many( - audio_props - .into_iter() - .map( - |( - codec_id, - AudioProps { - delay, - padding, - sample_rate, - sample_format, - bit_per_sample, - channel_layout, - }, - )| ffmpeg_media_audio_props::CreateUnchecked { - delay, - padding, - codec_id, - _params: vec![ - ffmpeg_media_audio_props::sample_rate::set(sample_rate), - ffmpeg_media_audio_props::sample_format::set(sample_format), - ffmpeg_media_audio_props::bit_per_sample::set(bit_per_sample), - ffmpeg_media_audio_props::channel_layout::set(channel_layout), - ], - }, - ) - .collect(), - ) - .exec() - .await - .map(|_| ()) -} - -async fn create_ffmpeg_video_props( - video_props: Vec<(ffmpeg_media_codec::id::Type, VideoProps)>, - db: &PrismaClient, -) -> Result<(), QueryError> { - db.ffmpeg_media_video_props() - .create_many( - video_props - .into_iter() - .map( - |( - codec_id, - VideoProps { - pixel_format, - color_range, - bits_per_channel, - color_space, - color_primaries, - color_transfer, - field_order, - chroma_location, - width, - height, - aspect_ratio_num, - aspect_ratio_den, - properties, - }, - )| { - ffmpeg_media_video_props::CreateUnchecked { - width, - height, - codec_id, - _params: vec![ - ffmpeg_media_video_props::pixel_format::set(pixel_format), - ffmpeg_media_video_props::color_range::set(color_range), - ffmpeg_media_video_props::bits_per_channel::set(bits_per_channel), - ffmpeg_media_video_props::color_space::set(color_space), - ffmpeg_media_video_props::color_primaries::set(color_primaries), - ffmpeg_media_video_props::color_transfer::set(color_transfer), - ffmpeg_media_video_props::field_order::set(field_order), - ffmpeg_media_video_props::chroma_location::set(chroma_location), - ffmpeg_media_video_props::aspect_ratio_num::set(aspect_ratio_num), - ffmpeg_media_video_props::aspect_ratio_den::set(aspect_ratio_den), - ffmpeg_media_video_props::properties::set(Some( - properties.join(","), - )), - ], - } - }, - ) - .collect(), - ) - .exec() - .await - .map(|_| ()) -} diff --git a/core/src/object/media/mod.rs b/core/src/object/media/mod.rs deleted file mode 100644 index 36ade439aa36..000000000000 --- a/core/src/object/media/mod.rs +++ /dev/null @@ -1,294 +0,0 @@ -// use sd_core_prisma_helpers::object_with_media_data; -// use sd_media_metadata::{ -// ffmpeg::{ -// audio_props::AudioProps, -// chapter::Chapter, -// codec::{Codec, Props}, -// program::Program, -// stream::Stream, -// video_props::VideoProps, -// }, -// ExifMetadata, FFmpegMetadata, -// }; -// use sd_prisma::prisma::{ -// exif_data::*, ffmpeg_media_audio_props, ffmpeg_media_chapter, ffmpeg_media_video_props, -// }; - -// pub mod exif_metadata_extractor; -// pub mod ffmpeg_metadata_extractor; -// pub mod old_media_processor; -// pub mod old_thumbnail; - -use crate::{library::LibraryId, Node}; - -use sd_core_heavy_lifting::media_processor::ThumbnailKind; - -use std::path::PathBuf; - -/// This does not check if a thumbnail exists, it just returns the path that it would exist at -pub fn get_indexed_thumbnail_path(node: &Node, cas_id: &str, library_id: LibraryId) -> PathBuf { - ThumbnailKind::Indexed(library_id).compute_path(node.config.data_directory(), cas_id) -} - -/// This does not check if a thumbnail exists, it just returns the path that it would exist at -pub fn get_ephemeral_thumbnail_path(node: &Node, cas_id: &str) -> PathBuf { - ThumbnailKind::Ephemeral.compute_path(node.config.data_directory(), cas_id) -} - -// pub use old_media_processor::OldMediaProcessorJobInit; -// use sd_utils::db::ffmpeg_data_field_from_db; - -// pub fn exif_data_image_to_query(mdi: ExifMetadata, object_id: object_id::Type) -> CreateUnchecked { -// CreateUnchecked { -// object_id, -// _params: vec![ -// camera_data::set(serde_json::to_vec(&mdi.camera_data).ok()), -// media_date::set(serde_json::to_vec(&mdi.date_taken).ok()), -// resolution::set(serde_json::to_vec(&mdi.resolution).ok()), -// media_location::set(serde_json::to_vec(&mdi.location).ok()), -// artist::set(mdi.artist), -// description::set(mdi.description), -// copyright::set(mdi.copyright), -// exif_version::set(mdi.exif_version), -// epoch_time::set(mdi.date_taken.map(|x| x.unix_timestamp())), -// ], -// } -// } - -// pub fn exif_data_image_to_query_params( -// mdi: ExifMetadata, -// ) -> (Vec<(&'static str, rmpv::Value)>, Vec) { -// use sd_sync::option_sync_db_entry; -// use sd_utils::chain_optional_iter; - -// chain_optional_iter( -// [], -// [ -// option_sync_db_entry!(serde_json::to_vec(&mdi.camera_data).ok(), camera_data), -// option_sync_db_entry!(serde_json::to_vec(&mdi.date_taken).ok(), media_date), -// option_sync_db_entry!(serde_json::to_vec(&mdi.location).ok(), media_location), -// option_sync_db_entry!(mdi.artist, artist), -// option_sync_db_entry!(mdi.description, description), -// option_sync_db_entry!(mdi.copyright, copyright), -// option_sync_db_entry!(mdi.exif_version, exif_version), -// ], -// ) -// .into_iter() -// .unzip() -// } - -// pub fn exif_media_data_from_prisma_data(data: sd_prisma::prisma::exif_data::Data) -> ExifMetadata { -// ExifMetadata { -// camera_data: from_slice_option_to_option(data.camera_data).unwrap_or_default(), -// date_taken: from_slice_option_to_option(data.media_date).unwrap_or_default(), -// resolution: from_slice_option_to_option(data.resolution).unwrap_or_default(), -// location: from_slice_option_to_option(data.media_location), -// artist: data.artist, -// description: data.description, -// copyright: data.copyright, -// exif_version: data.exif_version, -// } -// } - -// pub fn ffmpeg_data_from_prisma_data( -// object_with_media_data::ffmpeg_data::Data { -// formats, -// duration, -// start_time, -// bit_rate, -// metadata, -// chapters, -// programs, -// .. -// }: object_with_media_data::ffmpeg_data::Data, -// ) -> FFmpegMetadata { -// FFmpegMetadata { -// formats: formats.split(',').map(String::from).collect::>(), -// duration: duration.map(|duration| { -// let duration = ffmpeg_data_field_from_db(&duration); -// ((duration >> 32) as i32, duration as u32) -// }), -// start_time: start_time.map(|start_time| { -// let start_time = ffmpeg_data_field_from_db(&start_time); -// ((start_time >> 32) as i32, start_time as u32) -// }), -// bit_rate: { -// let bit_rate = ffmpeg_data_field_from_db(&bit_rate); -// ((bit_rate >> 32) as i32, bit_rate as u32) -// }, -// chapters: chapters -// .into_iter() -// .map( -// |ffmpeg_media_chapter::Data { -// chapter_id, -// start, -// end, -// time_base_den, -// time_base_num, -// metadata, -// .. -// }| Chapter { -// id: chapter_id, -// start: { -// let start = ffmpeg_data_field_from_db(&start); -// ((start >> 32) as i32, start as u32) -// }, -// end: { -// let end = ffmpeg_data_field_from_db(&end); -// ((end >> 32) as i32, end as u32) -// }, -// time_base_den, -// time_base_num, -// metadata: from_slice_option_to_option(metadata).unwrap_or_default(), -// }, -// ) -// .collect(), -// programs: programs -// .into_iter() -// .map( -// |object_with_media_data::ffmpeg_data::programs::Data { -// program_id, -// name, -// metadata, -// streams, -// .. -// }| Program { -// id: program_id, -// name, -// streams: streams -// .into_iter() -// .map( -// |object_with_media_data::ffmpeg_data::programs::streams::Data { -// stream_id, -// name, -// aspect_ratio_num, -// aspect_ratio_den, -// frames_per_second_num, -// frames_per_second_den, -// time_base_real_den, -// time_base_real_num, -// dispositions, -// metadata, -// codec, -// .. -// }| { -// Stream { -// id: stream_id, -// name, -// codec: codec.map( -// |object_with_media_data::ffmpeg_data::programs::streams::codec::Data{ -// kind, -// sub_kind, -// tag, -// name, -// profile, -// bit_rate, -// audio_props, -// video_props, -// .. -// }| Codec { -// kind, -// sub_kind, -// tag, -// name, -// profile, -// bit_rate, -// props: match (audio_props, video_props) { -// ( -// Some(ffmpeg_media_audio_props::Data { -// delay, -// padding, -// sample_rate, -// sample_format, -// bit_per_sample, -// channel_layout, -// .. -// }), -// None, -// ) => Some(Props::Audio(AudioProps { -// delay, -// padding, -// sample_rate, -// sample_format, -// bit_per_sample, -// channel_layout, -// })), -// ( -// None, -// Some(ffmpeg_media_video_props::Data { -// pixel_format, -// color_range, -// bits_per_channel, -// color_space, -// color_primaries, -// color_transfer, -// field_order, -// chroma_location, -// width, -// height, -// aspect_ratio_num, -// aspect_ratio_den, -// properties, -// .. -// }), -// ) => Some(Props::Video(VideoProps { -// pixel_format, -// color_range, -// bits_per_channel, -// color_space, -// color_primaries, -// color_transfer, -// field_order, -// chroma_location, -// width, -// height, -// aspect_ratio_num, -// aspect_ratio_den, -// properties: properties -// .map(|dispositions| { -// dispositions -// .split(',') -// .map(String::from) -// .collect::>() -// }) -// .unwrap_or_default(), -// })), -// _ => None, -// }, -// } -// ), -// aspect_ratio_num, -// aspect_ratio_den, -// frames_per_second_num, -// frames_per_second_den, -// time_base_real_den, -// time_base_real_num, -// dispositions: dispositions -// .map(|dispositions| { -// dispositions -// .split(',') -// .map(String::from) -// .collect::>() -// }) -// .unwrap_or_default(), -// metadata: from_slice_option_to_option(metadata).unwrap_or_default(), -// } -// }, -// ) -// .collect(), -// metadata: from_slice_option_to_option(metadata).unwrap_or_default(), -// }, -// ) -// .collect(), -// metadata: from_slice_option_to_option(metadata).unwrap_or_default(), -// } -// } - -// #[must_use] -// fn from_slice_option_to_option( -// value: Option>, -// ) -> Option { -// value -// .map(|x| serde_json::from_slice(&x).ok()) -// .unwrap_or_default() -// } diff --git a/core/src/object/media/old_media_processor/job.rs b/core/src/object/media/old_media_processor/job.rs deleted file mode 100644 index 938c21658eac..000000000000 --- a/core/src/object/media/old_media_processor/job.rs +++ /dev/null @@ -1,679 +0,0 @@ -use crate::{ - invalidate_query, - library::Library, - location::ScanState, - object::media::ffmpeg_metadata_extractor, - old_job::{ - CurrentStep, JobError, JobInitOutput, JobReportUpdate, JobResult, JobStepOutput, - StatefulJob, WorkerContext, - }, - Node, -}; - -#[cfg(feature = "ai")] -use crate::old_job::JobRunErrors; - -use sd_core_file_path_helper::{ - ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - IsolatedFilePathData, -}; -use sd_core_prisma_helpers::file_path_for_media_processor; - -use sd_file_ext::extensions::Extension; -use sd_prisma::prisma::{location, PrismaClient}; -use sd_utils::db::maybe_missing; - -#[cfg(feature = "ai")] -use sd_ai::old_image_labeler::{BatchToken as ImageLabelerBatchToken, LabelerOutput}; - -#[cfg(feature = "ai")] -use std::sync::Arc; - -use std::{ - hash::Hash, - path::{Path, PathBuf}, - pin::pin, - time::Duration, -}; - -use async_channel as chan; -use futures::StreamExt; -use itertools::Itertools; -use prisma_client_rust::{raw, PrismaValue}; -use serde::{Deserialize, Serialize}; -use serde_json::json; -use tokio::time::sleep; -use tracing::{debug, error, info, trace, warn}; - -use super::{ - exif_metadata_extractor, - old_thumbnail::{self, GenerateThumbnailArgs}, - process_audio_and_video, process_images, BatchToProcess, MediaProcessorError, - OldMediaProcessorMetadata, -}; - -const BATCH_SIZE: usize = 10; - -#[derive(Serialize, Deserialize, Debug)] -pub struct OldMediaProcessorJobInit { - pub location: location::Data, - pub sub_path: Option, - pub regenerate_thumbnails: bool, - pub regenerate_labels: bool, -} - -impl Hash for OldMediaProcessorJobInit { - fn hash(&self, state: &mut H) { - self.location.id.hash(state); - if let Some(ref sub_path) = self.sub_path { - sub_path.hash(state); - } - } -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct OldMediaProcessorJobData { - location_path: PathBuf, - to_process_path: PathBuf, - #[serde(skip, default)] - maybe_thumbnailer_progress_rx: Option>, - #[cfg(feature = "ai")] - labeler_batch_token: ImageLabelerBatchToken, - #[cfg(feature = "ai")] - #[serde(skip, default)] - maybe_labels_rx: Option>, -} - -#[derive(Debug, Serialize, Deserialize)] -pub enum OldMediaProcessorJobStep { - ExtractImageMediaData(Vec), - ExtractAudioAndVideoMediaData(Vec), - WaitThumbnails(usize), - #[cfg(feature = "ai")] - WaitLabels(usize), -} - -#[async_trait::async_trait] -impl StatefulJob for OldMediaProcessorJobInit { - type Data = OldMediaProcessorJobData; - type Step = OldMediaProcessorJobStep; - type RunMetadata = OldMediaProcessorMetadata; - - const NAME: &'static str = "media_processor"; - const IS_BATCHED: bool = true; - - fn target_location(&self) -> location::id::Type { - self.location.id - } - - async fn init( - &self, - ctx: &WorkerContext, - data: &mut Option, - ) -> Result, JobError> { - let Library { - db, - #[cfg(feature = "ai")] - sync, - .. - } = ctx.library.as_ref(); - - let location_id = self.location.id; - let location_path = - maybe_missing(&self.location.path, "location.path").map(PathBuf::from)?; - - let (to_process_path, iso_file_path) = match &self.sub_path { - Some(sub_path) if sub_path != Path::new("") => { - let full_path = ensure_sub_path_is_in_location(&location_path, sub_path) - .await - .map_err(MediaProcessorError::from)?; - ensure_sub_path_is_directory(&location_path, sub_path) - .await - .map_err(MediaProcessorError::from)?; - - let sub_iso_file_path = - IsolatedFilePathData::new(location_id, &location_path, &full_path, true) - .map_err(MediaProcessorError::from)?; - - ensure_file_path_exists( - sub_path, - &sub_iso_file_path, - db, - MediaProcessorError::SubPathNotFound, - ) - .await?; - - (full_path, sub_iso_file_path) - } - _ => ( - location_path.to_path_buf(), - IsolatedFilePathData::new(location_id, &location_path, &location_path, true) - .map_err(MediaProcessorError::from)?, - ), - }; - - debug!( - "Searching for media files in location {location_id} at directory \"{iso_file_path}\"" - ); - - let thumbs_to_process_count = dispatch_thumbnails_for_processing( - location_id, - &location_path, - &iso_file_path, - &ctx.library, - &ctx.node, - self.regenerate_thumbnails, - ) - .await?; - - let maybe_thumbnailer_progress_rx = if thumbs_to_process_count > 0 { - let (progress_tx, progress_rx) = chan::unbounded(); - - ctx.node - .thumbnailer - .register_reporter(location_id, progress_tx) - .await; - - Some(progress_rx) - } else { - None - }; - - let file_paths_to_extract_exif_data = - get_files_for_image_media_data_extraction(db, &iso_file_path).await?; - let file_paths_to_extract_ffmpeg_data = - get_files_for_audio_and_video_media_data_extraction(db, &iso_file_path).await?; - - #[cfg(feature = "ai")] - let file_paths_for_labeling = - get_files_for_labeling(db, &iso_file_path, self.regenerate_labels).await?; - - #[cfg(feature = "ai")] - let total_files_for_labeling = file_paths_for_labeling.len(); - - #[cfg(feature = "ai")] - let (labeler_batch_token, labels_rx) = - if let Some(image_labeller) = ctx.node.old_image_labeller.as_ref() { - let (labeler_batch_token, labels_rx) = image_labeller - .new_resumable_batch( - location_id, - location_path.clone(), - file_paths_for_labeling, - Arc::clone(db), - sync.clone(), - ) - .await; - (labeler_batch_token, Some(labels_rx)) - } else { - (uuid::Uuid::new_v4(), None) - }; - - let total_files = - file_paths_to_extract_exif_data.len() + file_paths_to_extract_ffmpeg_data.len(); - - let chunked_files = file_paths_to_extract_exif_data - .into_iter() - .chunks(BATCH_SIZE) - .into_iter() - .map(|chunk| chunk.collect::>()) - .map(OldMediaProcessorJobStep::ExtractImageMediaData) - .chain( - file_paths_to_extract_ffmpeg_data - .into_iter() - .chunks(BATCH_SIZE) - .into_iter() - .map(|chunk| chunk.collect::>()) - .map(OldMediaProcessorJobStep::ExtractAudioAndVideoMediaData), - ) - .chain( - [(thumbs_to_process_count > 0).then_some( - OldMediaProcessorJobStep::WaitThumbnails(thumbs_to_process_count as usize), - )] - .into_iter() - .flatten(), - ) - .chain( - [ - #[cfg(feature = "ai")] - { - (total_files_for_labeling > 0).then_some( - OldMediaProcessorJobStep::WaitLabels(total_files_for_labeling), - ) - }, - #[cfg(not(feature = "ai"))] - { - None - }, - ] - .into_iter() - .flatten(), - ) - .collect::>(); - - ctx.progress(vec![ - JobReportUpdate::TaskCount(total_files), - JobReportUpdate::Phase("media_data".to_string()), - JobReportUpdate::Message(format!( - "Preparing to process {total_files} files in {} chunks", - chunked_files.len() - )), - ]); - - *data = Some(OldMediaProcessorJobData { - location_path, - to_process_path, - maybe_thumbnailer_progress_rx, - #[cfg(feature = "ai")] - labeler_batch_token, - #[cfg(feature = "ai")] - maybe_labels_rx: labels_rx, - }); - - Ok(( - Self::RunMetadata { - thumbs_processed: thumbs_to_process_count, - ..Default::default() - }, - chunked_files, - ) - .into()) - } - - async fn execute_step( - &self, - ctx: &WorkerContext, - CurrentStep { step, step_number }: CurrentStep<'_, Self::Step>, - data: &Self::Data, - _: &Self::RunMetadata, - ) -> Result, JobError> { - match step { - OldMediaProcessorJobStep::ExtractImageMediaData(file_paths) => process_images( - file_paths, - self.location.id, - &data.location_path, - &ctx.library.db, - &|completed_count| { - ctx.progress(vec![JobReportUpdate::CompletedTaskCount( - step_number * BATCH_SIZE + completed_count, - )]); - }, - ) - .await - .map(Into::into) - .map_err(Into::into), - - OldMediaProcessorJobStep::ExtractAudioAndVideoMediaData(file_paths) => { - process_audio_and_video( - file_paths, - self.location.id, - &data.location_path, - &ctx.library.db, - &|completed_count| { - ctx.progress(vec![JobReportUpdate::CompletedTaskCount( - step_number * BATCH_SIZE + completed_count, - )]); - }, - ) - .await - .map(Into::into) - .map_err(Into::into) - } - - OldMediaProcessorJobStep::WaitThumbnails(total_thumbs) => { - ctx.progress(vec![ - JobReportUpdate::TaskCount(*total_thumbs), - JobReportUpdate::Phase("thumbnails".to_string()), - JobReportUpdate::Message(format!( - "Waiting for processing of {total_thumbs} thumbnails", - )), - ]); - - let mut progress_rx = pin!(if let Some(progress_rx) = - data.maybe_thumbnailer_progress_rx.clone() - { - progress_rx - } else { - let (progress_tx, progress_rx) = chan::unbounded(); - - ctx.node - .thumbnailer - .register_reporter(self.location.id, progress_tx) - .await; - - progress_rx - }); - - let mut total_completed = 0; - - while let Some((completed, total)) = progress_rx.next().await { - trace!("Received progress update from thumbnailer: {completed}/{total}",); - ctx.progress(vec![JobReportUpdate::CompletedTaskCount( - completed as usize, - )]); - total_completed = completed; - } - - if progress_rx.is_closed() && total_completed < *total_thumbs as u32 { - warn!( - "Thumbnailer progress reporter channel closed before all thumbnails were \ - processed, job will wait a bit waiting for a shutdown signal from manager" - ); - sleep(Duration::from_secs(5)).await; - } - - Ok(None.into()) - } - - #[cfg(feature = "ai")] - OldMediaProcessorJobStep::WaitLabels(total_labels) => { - let Some(image_labeller) = ctx.node.old_image_labeller.as_ref() else { - let err = "AI system is disabled due to a previous error, skipping labels job"; - error!(err); - return Ok(JobRunErrors(vec![err.to_string()]).into()); - }; - - ctx.progress(vec![ - JobReportUpdate::TaskCount(*total_labels), - JobReportUpdate::Phase("labels".to_string()), - JobReportUpdate::Message( - format!("Extracting labels for {total_labels} files",), - ), - ]); - - let mut labels_rx = pin!(if let Some(labels_rx) = data.maybe_labels_rx.clone() { - labels_rx - } else { - match image_labeller - .resume_batch( - data.labeler_batch_token, - Arc::clone(&ctx.library.db), - ctx.library.sync.clone(), - ) - .await - { - Ok(labels_rx) => labels_rx, - Err(e) => return Ok(JobRunErrors(vec![e.to_string()]).into()), - } - }); - - let mut total_labeled = 0; - - let mut errors = Vec::new(); - - while let Some(LabelerOutput { - file_path_id, - has_new_labels, - result, - }) = labels_rx.next().await - { - total_labeled += 1; - ctx.progress(vec![JobReportUpdate::CompletedTaskCount(total_labeled)]); - - if let Err(e) = result { - error!( - "Failed to generate labels : {e:#?}", - file_path_id - ); - - errors.push(e.to_string()); - } else if has_new_labels { - // invalidate_query!(&ctx.library, "labels.count"); // TODO: This query doesn't exist on main yet - } - } - - invalidate_query!(&ctx.library, "labels.list"); - invalidate_query!(&ctx.library, "labels.getForObject"); - invalidate_query!(&ctx.library, "labels.getWithObjects"); - - if !errors.is_empty() { - Ok(JobRunErrors(errors).into()) - } else { - Ok(None.into()) - } - } - } - } - - async fn finalize( - &self, - ctx: &WorkerContext, - data: &Option, - run_metadata: &Self::RunMetadata, - ) -> JobResult { - info!( - "Finished media processing for location {} at {}", - self.location.id, - data.as_ref() - .expect("critical error: missing data on job state") - .to_process_path - .display() - ); - - if run_metadata.exif_data.extracted > 0 || run_metadata.ffmpeg_data.extracted > 0 { - invalidate_query!(ctx.library, "search.paths"); - } - - ctx.library - .db - .location() - .update( - location::id::equals(self.location.id), - vec![location::scan_state::set(ScanState::Completed as i32)], - ) - .exec() - .await - .map_err(MediaProcessorError::from)?; - - Ok(Some(json!({"init: ": self, "run_metadata": run_metadata}))) - } -} - -async fn dispatch_thumbnails_for_processing( - location_id: location::id::Type, - location_path: impl AsRef, - parent_iso_file_path: &IsolatedFilePathData<'_>, - library: &Library, - node: &Node, - should_regenerate: bool, -) -> Result { - let Library { db, .. } = library; - - let location_path = location_path.as_ref(); - - let mut file_paths = get_all_children_files_by_extensions( - db, - parent_iso_file_path, - &old_thumbnail::ALL_THUMBNAILABLE_EXTENSIONS, - ) - .await?; - - if file_paths.is_empty() { - return Ok(0); - } - - let first_materialized_path = file_paths[0].materialized_path.clone(); - - // Only the first materialized_path should be processed in foreground - let different_materialized_path_idx = file_paths - .iter() - .position(|file_path| file_path.materialized_path != first_materialized_path); - - let background_thumbs_args = different_materialized_path_idx - .map(|idx| { - file_paths - .split_off(idx) - .into_iter() - .filter_map(|file_path| prepare_args(location_id, location_path, file_path)) - .collect::>() - }) - .unwrap_or_default(); - - let foreground_thumbs_args = file_paths - .into_iter() - .filter_map(|file_path| prepare_args(location_id, location_path, file_path)) - .collect::>(); - - let thumbs_count = background_thumbs_args.len() + foreground_thumbs_args.len(); - - debug!( - "Dispatching {thumbs_count} thumbnails to be processed, {} in foreground and {} in background", - foreground_thumbs_args.len(), - background_thumbs_args.len() - ); - - if !foreground_thumbs_args.is_empty() { - node.thumbnailer - .new_indexed_thumbnails_tracked_batch( - BatchToProcess::new(foreground_thumbs_args, should_regenerate, false), - library.id, - location_id, - ) - .await; - } - - if !background_thumbs_args.is_empty() { - node.thumbnailer - .new_indexed_thumbnails_tracked_batch( - BatchToProcess::new(background_thumbs_args, should_regenerate, true), - library.id, - location_id, - ) - .await; - } - - Ok(thumbs_count as u32) -} - -async fn get_files_for_image_media_data_extraction( - db: &PrismaClient, - parent_iso_file_path: &IsolatedFilePathData<'_>, -) -> Result, MediaProcessorError> { - get_all_children_files_by_extensions( - db, - parent_iso_file_path, - &exif_metadata_extractor::FILTERED_IMAGE_EXTENSIONS, - ) - .await - .map_err(Into::into) -} - -async fn get_files_for_audio_and_video_media_data_extraction( - db: &PrismaClient, - parent_iso_file_path: &IsolatedFilePathData<'_>, -) -> Result, MediaProcessorError> { - get_all_children_files_by_extensions( - db, - parent_iso_file_path, - &ffmpeg_metadata_extractor::FILTERED_AUDIO_AND_VIDEO_EXTENSIONS, - ) - .await - .map_err(Into::into) -} - -#[cfg(feature = "ai")] -async fn get_files_for_labeling( - db: &PrismaClient, - parent_iso_file_path: &IsolatedFilePathData<'_>, - regenerate_labels: bool, -) -> Result, MediaProcessorError> { - // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite - // We have no data coming from the user, so this is sql injection safe - db._query_raw(raw!( - &format!( - "SELECT id, materialized_path, is_dir, name, extension, cas_id, object_id - FROM file_path f - WHERE - location_id={{}} - AND cas_id IS NOT NULL - AND LOWER(extension) IN ({}) - AND materialized_path LIKE {{}} - {} - ORDER BY materialized_path ASC", - // Ordering by materialized_path so we can prioritize processing the first files - // in the above part of the directories tree - &exif_metadata_extractor::FILTERED_IMAGE_EXTENSIONS - .iter() - .map(|ext| format!("LOWER('{ext}')")) - .collect::>() - .join(","), - if !regenerate_labels { - "AND NOT EXISTS (SELECT 1 FROM label_on_object WHERE object_id = f.object_id)" - } else { - "" - } - ), - PrismaValue::Int(parent_iso_file_path.location_id()), - PrismaValue::String(format!( - "{}%", - parent_iso_file_path - .materialized_path_for_children() - .expect("sub path iso_file_path must be a directory") - )) - )) - .exec() - .await - .map_err(Into::into) -} - -async fn get_all_children_files_by_extensions( - db: &PrismaClient, - parent_iso_file_path: &IsolatedFilePathData<'_>, - extensions: &[Extension], -) -> Result, MediaProcessorError> { - // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite - // We have no data coming from the user, so this is sql injection safe - db._query_raw(raw!( - &format!( - "SELECT id, materialized_path, is_dir, name, extension, cas_id, object_id - FROM file_path - WHERE - location_id={{}} - AND cas_id IS NOT NULL - AND LOWER(extension) IN ({}) - AND materialized_path LIKE {{}} - ORDER BY materialized_path ASC", - // Ordering by materialized_path so we can prioritize processing the first files - // in the above part of the directories tree - extensions - .iter() - .map(|ext| format!("LOWER('{ext}')")) - .collect::>() - .join(",") - ), - PrismaValue::Int(parent_iso_file_path.location_id()), - PrismaValue::String(format!( - "{}%", - parent_iso_file_path - .materialized_path_for_children() - .expect("sub path iso_file_path must be a directory") - )) - )) - .exec() - .await - .map_err(Into::into) -} - -fn prepare_args( - location_id: location::id::Type, - location_path: &Path, // This function is only used internally once, so we can pass &Path as a parameter - file_path: file_path_for_media_processor::Data, -) -> Option { - let file_path_id = file_path.id; - - let Ok(cas_id) = maybe_missing(&file_path.cas_id, "file_path.cas_id").cloned() else { - error!("Missing cas_id for file_path "); - return None; - }; - - let Ok(iso_file_path) = IsolatedFilePathData::try_from((location_id, file_path)).map_err(|e| { - error!("Failed to extract isolated file path data from file path : {e:#?}"); - }) else { - return None; - }; - - Some(GenerateThumbnailArgs::new( - iso_file_path.extension().to_string(), - cas_id, - location_path.join(&iso_file_path), - )) -} diff --git a/core/src/object/media/old_media_processor/mod.rs b/core/src/object/media/old_media_processor/mod.rs deleted file mode 100644 index b89010ec7176..000000000000 --- a/core/src/object/media/old_media_processor/mod.rs +++ /dev/null @@ -1,109 +0,0 @@ -use crate::old_job::{JobRunErrors, JobRunMetadata}; - -use sd_core_file_path_helper::FilePathError; -use sd_core_prisma_helpers::file_path_for_media_processor; - -use sd_prisma::prisma::{location, PrismaClient}; - -use std::path::Path; - -use serde::{Deserialize, Serialize}; -use thiserror::Error; -use tracing::error; - -use super::{ - exif_metadata_extractor::{self, ExifDataError, OldExifDataExtractorMetadata}, - ffmpeg_metadata_extractor::{self, FFmpegDataError, OldFFmpegDataExtractorMetadata}, - old_thumbnail::{self, BatchToProcess, ThumbnailerError}, -}; - -mod job; -mod shallow; - -pub use job::OldMediaProcessorJobInit; -pub use shallow::old_shallow; - -#[derive(Error, Debug)] -pub enum MediaProcessorError { - #[error("sub path not found: ", .0.display())] - SubPathNotFound(Box), - - #[error("database error: {0}")] - Database(#[from] prisma_client_rust::QueryError), - #[error(transparent)] - FilePath(#[from] FilePathError), - - #[error(transparent)] - Thumbnailer(#[from] ThumbnailerError), - #[error(transparent)] - ExifMediaDataExtractor(#[from] ExifDataError), - #[error(transparent)] - FFmpegDataExtractor(#[from] FFmpegDataError), -} - -#[derive(Debug, Serialize, Deserialize, Default)] -pub struct OldMediaProcessorMetadata { - exif_data: OldExifDataExtractorMetadata, - ffmpeg_data: OldFFmpegDataExtractorMetadata, - thumbs_processed: u32, - labels_extracted: u32, -} - -impl From for OldMediaProcessorMetadata { - fn from(exif_data: OldExifDataExtractorMetadata) -> Self { - Self { - exif_data, - ffmpeg_data: Default::default(), - thumbs_processed: 0, - labels_extracted: 0, - } - } -} - -impl From for OldMediaProcessorMetadata { - fn from(ffmpeg_data: OldFFmpegDataExtractorMetadata) -> Self { - Self { - exif_data: Default::default(), - ffmpeg_data, - thumbs_processed: 0, - labels_extracted: 0, - } - } -} - -impl JobRunMetadata for OldMediaProcessorMetadata { - fn update(&mut self, new_data: Self) { - self.exif_data.extracted += new_data.exif_data.extracted; - self.exif_data.skipped += new_data.exif_data.skipped; - self.ffmpeg_data.extracted += new_data.ffmpeg_data.extracted; - self.ffmpeg_data.skipped += new_data.ffmpeg_data.skipped; - self.thumbs_processed += new_data.thumbs_processed; - self.labels_extracted += new_data.labels_extracted; - } -} - -pub async fn process_images( - files_paths: &[file_path_for_media_processor::Data], - location_id: location::id::Type, - location_path: impl AsRef + Send, - db: &PrismaClient, - ctx_update_fn: &impl Fn(usize), -) -> Result<(OldMediaProcessorMetadata, JobRunErrors), MediaProcessorError> { - exif_metadata_extractor::process(files_paths, location_id, location_path, db, ctx_update_fn) - .await - .map(|(exif_extraction_metadata, errors)| (exif_extraction_metadata.into(), errors)) - .map_err(Into::into) -} - -pub async fn process_audio_and_video( - files_paths: &[file_path_for_media_processor::Data], - location_id: location::id::Type, - location_path: impl AsRef + Send, - db: &PrismaClient, - ctx_update_fn: &impl Fn(usize), -) -> Result<(OldMediaProcessorMetadata, JobRunErrors), MediaProcessorError> { - ffmpeg_metadata_extractor::process(files_paths, location_id, location_path, db, ctx_update_fn) - .await - .map(|(ffmpeg_extraction_metadata, errors)| (ffmpeg_extraction_metadata.into(), errors)) - .map_err(Into::into) -} diff --git a/core/src/object/media/old_media_processor/shallow.rs b/core/src/object/media/old_media_processor/shallow.rs deleted file mode 100644 index 12197ebb2970..000000000000 --- a/core/src/object/media/old_media_processor/shallow.rs +++ /dev/null @@ -1,367 +0,0 @@ -use crate::{ - invalidate_query, - library::Library, - old_job::{JobError, JobRunMetadata}, - Node, -}; - -use sd_core_file_path_helper::{ - ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - IsolatedFilePathData, -}; -use sd_core_prisma_helpers::file_path_for_media_processor; - -use sd_file_ext::extensions::Extension; -use sd_prisma::prisma::{location, PrismaClient}; -use sd_utils::db::maybe_missing; - -#[cfg(feature = "ai")] -use sd_ai::old_image_labeler::LabelerOutput; - -use std::path::{Path, PathBuf}; - -#[cfg(feature = "ai")] -use std::sync::Arc; - -use itertools::Itertools; -use prisma_client_rust::{raw, PrismaValue}; -use tracing::{debug, error}; - -#[cfg(feature = "ai")] -use futures::StreamExt; - -use super::{ - exif_metadata_extractor, ffmpeg_metadata_extractor, - old_thumbnail::{self, BatchToProcess, GenerateThumbnailArgs}, - MediaProcessorError, OldMediaProcessorMetadata, -}; - -const BATCH_SIZE: usize = 10; - -pub async fn old_shallow( - location: &location::Data, - sub_path: &PathBuf, - library @ Library { - db, - #[cfg(feature = "ai")] - sync, - .. - }: &Library, - #[cfg(feature = "ai")] regenerate_labels: bool, - node: &Node, -) -> Result<(), JobError> { - let location_id = location.id; - let location_path = maybe_missing(&location.path, "location.path").map(PathBuf::from)?; - - let iso_file_path = if sub_path != Path::new("") { - let full_path = ensure_sub_path_is_in_location(&location_path, &sub_path) - .await - .map_err(MediaProcessorError::from)?; - ensure_sub_path_is_directory(&location_path, &sub_path) - .await - .map_err(MediaProcessorError::from)?; - - let sub_iso_file_path = - IsolatedFilePathData::new(location_id, &location_path, &full_path, true) - .map_err(MediaProcessorError::from)?; - - ensure_file_path_exists( - &sub_path, - &sub_iso_file_path, - db, - MediaProcessorError::SubPathNotFound, - ) - .await?; - - sub_iso_file_path - } else { - IsolatedFilePathData::new(location_id, &location_path, &location_path, true) - .map_err(MediaProcessorError::from)? - }; - - debug!("Searching for media in location {location_id} at path {iso_file_path}"); - - dispatch_thumbnails_for_processing( - location.id, - &location_path, - &iso_file_path, - library, - node, - false, - ) - .await?; - - let file_paths_to_extract_exif_data = - get_files_for_exif_media_data_extraction(db, &iso_file_path).await?; - let file_paths_to_extract_ffmpeg_data = - get_files_for_ffmpeg_media_data_extraction(db, &iso_file_path).await?; - - #[cfg(feature = "ai")] - let file_paths_for_labelling = - get_files_for_labeling(db, &iso_file_path, regenerate_labels).await?; - - #[cfg(feature = "ai")] - let has_labels = !file_paths_for_labelling.is_empty(); - - let total_files = - file_paths_to_extract_exif_data.len() + file_paths_to_extract_ffmpeg_data.len(); - - let chunked_files_to_extract_exif_data = file_paths_to_extract_exif_data - .into_iter() - .chunks(BATCH_SIZE) - .into_iter() - .map(Iterator::collect) - .collect::>>(); - - let chunked_files_to_extract_ffmpeg_data = file_paths_to_extract_ffmpeg_data - .into_iter() - .chunks(BATCH_SIZE) - .into_iter() - .map(Iterator::collect) - .collect::>>(); - - debug!( - "Preparing to process {total_files} files in {} chunks", - chunked_files_to_extract_exif_data.len() + chunked_files_to_extract_ffmpeg_data.len() - ); - - #[cfg(feature = "ai")] - // Check if we have an image labeller and has_labels then enqueue a new batch - let labels_rx = node.old_image_labeller.as_ref().and_then(|image_labeller| { - has_labels.then(|| { - image_labeller.new_batch( - location_id, - location_path.clone(), - file_paths_for_labelling, - Arc::clone(db), - sync.clone(), - ) - }) - }); - - let mut run_metadata = OldMediaProcessorMetadata::default(); - - for files in chunked_files_to_extract_exif_data { - let (more_run_metadata, errors) = - exif_metadata_extractor::process(&files, location.id, &location_path, db, &|_| {}) - .await - .map_err(MediaProcessorError::from)?; - - run_metadata.update(more_run_metadata.into()); - - if !errors.is_empty() { - error!("Errors processing chunk of image media data shallow extraction:\n{errors}"); - } - } - - for files in chunked_files_to_extract_ffmpeg_data { - let (more_run_metadata, errors) = - ffmpeg_metadata_extractor::process(&files, location.id, &location_path, db, &|_| {}) - .await - .map_err(MediaProcessorError::from)?; - - run_metadata.update(more_run_metadata.into()); - - if !errors.is_empty() { - error!("Errors processing chunk of audio or video media data shallow extraction:\n{errors}"); - } - } - - debug!("Media shallow processor run metadata: {run_metadata:?}"); - - if run_metadata.exif_data.extracted > 0 || run_metadata.ffmpeg_data.extracted > 0 { - invalidate_query!(library, "search.paths"); - invalidate_query!(library, "search.objects"); - } - - #[cfg(feature = "ai")] - { - if has_labels { - if let Some(labels_rx) = labels_rx { - labels_rx - .await - .for_each( - |LabelerOutput { - file_path_id, - has_new_labels, - result, - }| async move { - if let Err(e) = result { - error!( - "Failed to generate labels : {e:#?}" - ); - } else if has_new_labels { - // invalidate_query!(library, "labels.count"); // TODO: This query doesn't exist on main yet - } - }, - ) - .await; - - invalidate_query!(library, "labels.list"); - invalidate_query!(library, "labels.getForObject"); - invalidate_query!(library, "labels.getWithObjects"); - } - } - } - - Ok(()) -} - -async fn get_files_for_exif_media_data_extraction( - db: &PrismaClient, - parent_iso_file_path: &IsolatedFilePathData<'_>, -) -> Result, MediaProcessorError> { - get_files_by_extensions( - db, - parent_iso_file_path, - &exif_metadata_extractor::FILTERED_IMAGE_EXTENSIONS, - ) - .await - .map_err(Into::into) -} - -async fn get_files_for_ffmpeg_media_data_extraction( - db: &PrismaClient, - parent_iso_file_path: &IsolatedFilePathData<'_>, -) -> Result, MediaProcessorError> { - get_files_by_extensions( - db, - parent_iso_file_path, - &ffmpeg_metadata_extractor::FILTERED_AUDIO_AND_VIDEO_EXTENSIONS, - ) - .await - .map_err(Into::into) -} - -#[cfg(feature = "ai")] -async fn get_files_for_labeling( - db: &PrismaClient, - parent_iso_file_path: &IsolatedFilePathData<'_>, - regenerate_labels: bool, -) -> Result, MediaProcessorError> { - // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite - // We have no data coming from the user, so this is sql injection safe - db._query_raw(raw!( - &format!( - "SELECT id, materialized_path, is_dir, name, extension, cas_id, object_id - FROM file_path f - WHERE - location_id={{}} - AND cas_id IS NOT NULL - AND LOWER(extension) IN ({}) - AND materialized_path = {{}} - {}", - &exif_metadata_extractor::FILTERED_IMAGE_EXTENSIONS - .iter() - .map(|ext| format!("LOWER('{ext}')")) - .collect::>() - .join(","), - if !regenerate_labels { - "AND NOT EXISTS (SELECT 1 FROM label_on_object WHERE object_id = f.object_id)" - } else { - "" - } - ), - PrismaValue::Int(parent_iso_file_path.location_id()), - PrismaValue::String( - parent_iso_file_path - .materialized_path_for_children() - .expect("sub path iso_file_path must be a directory") - ) - )) - .exec() - .await - .map_err(Into::into) -} - -async fn dispatch_thumbnails_for_processing( - location_id: location::id::Type, - location_path: impl AsRef, - parent_iso_file_path: &IsolatedFilePathData<'_>, - library: &Library, - node: &Node, - should_regenerate: bool, -) -> Result<(), MediaProcessorError> { - let Library { db, .. } = library; - - let location_path = location_path.as_ref(); - - let file_paths = get_files_by_extensions( - db, - parent_iso_file_path, - &old_thumbnail::ALL_THUMBNAILABLE_EXTENSIONS, - ) - .await?; - - let current_batch = file_paths - .into_iter() - .filter_map(|file_path| { - if let Some(cas_id) = file_path.cas_id.as_ref() { - Some((cas_id.clone(), file_path)) - } else { - error!("File path has no cas_id, skipping", file_path.id); - None - } - }) - .filter_map(|(cas_id, file_path)| { - let file_path_id = file_path.id; - IsolatedFilePathData::try_from((location_id, file_path)) - .map_err(|e| { - error!("Failed to extract isolated file path data from file path : {e:#?}"); - }) - .ok() - .map(|iso_file_path| (cas_id, iso_file_path)) - }) - .map(|(cas_id, iso_file_path)| { - let full_path = location_path.join(&iso_file_path); - - GenerateThumbnailArgs::new(iso_file_path.extension().to_string(), cas_id, full_path) - }) - .collect::>(); - - // Let's not send an empty batch lol - if !current_batch.is_empty() { - node.thumbnailer - .new_indexed_thumbnails_batch( - BatchToProcess::new(current_batch, should_regenerate, false), - library.id, - ) - .await; - } - - Ok(()) -} - -async fn get_files_by_extensions( - db: &PrismaClient, - parent_iso_file_path: &IsolatedFilePathData<'_>, - extensions: &[Extension], -) -> Result, MediaProcessorError> { - // FIXME: Had to use format! macro because PCR doesn't support IN with Vec for SQLite - // We have no data coming from the user, so this is sql injection safe - db._query_raw(raw!( - &format!( - "SELECT id, materialized_path, is_dir, name, extension, cas_id, object_id - FROM file_path - WHERE - location_id={{}} - AND cas_id IS NOT NULL - AND LOWER(extension) IN ({}) - AND materialized_path = {{}}", - extensions - .iter() - .map(|ext| format!("LOWER('{ext}')")) - .collect::>() - .join(",") - ), - PrismaValue::Int(parent_iso_file_path.location_id()), - PrismaValue::String( - parent_iso_file_path - .materialized_path_for_children() - .expect("sub path iso_file_path must be a directory") - ) - )) - .exec() - .await - .map_err(Into::into) -} diff --git a/core/src/object/media/old_thumbnail/clean_up.rs b/core/src/object/media/old_thumbnail/clean_up.rs index 70a4c7f4d104..fcfee9a2a204 100644 --- a/core/src/object/media/old_thumbnail/clean_up.rs +++ b/core/src/object/media/old_thumbnail/clean_up.rs @@ -11,6 +11,8 @@ use tracing::{debug, error}; use super::{ThumbnailerError, EPHEMERAL_DIR, WEBP_EXTENSION}; +// TODO(fogodev) Introduce a task using the new task system to clean up the thumbnails from time to time. + pub(super) async fn process_ephemeral_clean_up( thumbnails_directory: Arc, existing_ephemeral_thumbs: HashSet, diff --git a/core/src/object/media/old_thumbnail/directory.rs b/core/src/object/media/old_thumbnail/directory.rs index 38db7adf54f8..1360aaed3ebd 100644 --- a/core/src/object/media/old_thumbnail/directory.rs +++ b/core/src/object/media/old_thumbnail/directory.rs @@ -27,6 +27,8 @@ use super::{ VERSION_FILE, WEBP_EXTENSION, }; +// TODO(fogodev): Move this logic to be used alongside the NodeConfig or other Node part to run at app startup + #[derive( IntEnum, Debug, Clone, Copy, Eq, PartialEq, strum::Display, Serialize_repr, Deserialize_repr, )] diff --git a/core/src/object/media/old_thumbnail/mod.rs b/core/src/object/media/old_thumbnail/mod.rs deleted file mode 100644 index 604f7c750ead..000000000000 --- a/core/src/object/media/old_thumbnail/mod.rs +++ /dev/null @@ -1,192 +0,0 @@ -// use crate::{library::LibraryId, util::version_manager::VersionManagerError, Node}; - -// use sd_file_ext::extensions::{ -// DocumentExtension, Extension, ImageExtension, ALL_DOCUMENT_EXTENSIONS, ALL_IMAGE_EXTENSIONS, -// }; -// use sd_utils::error::FileIOError; - -// #[cfg(feature = "ffmpeg")] -// use sd_file_ext::extensions::{VideoExtension, ALL_VIDEO_EXTENSIONS}; - -// use std::{ -// path::{Path, PathBuf}, -// time::Duration, -// }; - -// use once_cell::sync::Lazy; -// use serde::{Deserialize, Serialize}; -// use thiserror::Error; -// use tokio::task; -// use tracing::error; - -// mod clean_up; -// mod directory; -// pub mod old_actor; -// pub mod preferences; -// mod process; -// mod shard; -// mod state; -// mod worker; - -// pub use process::{BatchToProcess, GenerateThumbnailArgs}; -// pub use shard::get_shard_hex; - -// use directory::ThumbnailVersion; - -// // Files names constants -// const THUMBNAIL_CACHE_DIR_NAME: &str = "thumbnails"; -// const SAVE_STATE_FILE: &str = "thumbs_to_process.bin"; -// const VERSION_FILE: &str = "version.txt"; -// pub const WEBP_EXTENSION: &str = "webp"; -// const EPHEMERAL_DIR: &str = "ephemeral"; - -// /// This is the target pixel count for all thumbnails to be resized to, and it is eventually downscaled -// /// to [`TARGET_QUALITY`]. -// const TARGET_PX: f32 = 1048576.0; // 1024x1024 - -// /// This is the target quality that we render thumbnails at, it is a float between 0-100 -// /// and is treated as a percentage (so 60% in this case, or it's the same as multiplying by `0.6`). -// const TARGET_QUALITY: f32 = 60.0; - -// // Some time constants -// const ONE_SEC: Duration = Duration::from_secs(1); -// const THIRTY_SECS: Duration = Duration::from_secs(30); -// const HALF_HOUR: Duration = Duration::from_secs(30 * 60); - -// #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)] -// pub enum ThumbnailKind { -// Ephemeral, -// Indexed(LibraryId), -// } - -// pub fn get_indexed_thumbnail_path(node: &Node, cas_id: &str, library_id: LibraryId) -> PathBuf { -// get_thumbnail_path(node, cas_id, ThumbnailKind::Indexed(library_id)) -// } - -// pub fn get_ephemeral_thumbnail_path(node: &Node, cas_id: &str) -> PathBuf { -// get_thumbnail_path(node, cas_id, ThumbnailKind::Ephemeral) -// } - -// /// This does not check if a thumbnail exists, it just returns the path that it would exist at -// fn get_thumbnail_path(node: &Node, cas_id: &str, kind: ThumbnailKind) -> PathBuf { -// let mut thumb_path = node.config.data_directory(); - -// thumb_path.push(THUMBNAIL_CACHE_DIR_NAME); -// match kind { -// ThumbnailKind::Ephemeral => thumb_path.push(EPHEMERAL_DIR), -// ThumbnailKind::Indexed(library_id) => { -// thumb_path.push(library_id.to_string()); -// } -// } -// thumb_path.push(get_shard_hex(cas_id)); -// thumb_path.push(cas_id); -// thumb_path.set_extension(WEBP_EXTENSION); - -// thumb_path -// } - -// pub fn get_indexed_thumb_key(cas_id: &str, library_id: LibraryId) -> Vec { -// get_thumb_key(cas_id, ThumbnailKind::Indexed(library_id)) -// } - -// pub fn get_ephemeral_thumb_key(cas_id: &str) -> Vec { -// get_thumb_key(cas_id, ThumbnailKind::Ephemeral) -// } - -// // this is used to pass the relevant data to the frontend so it can request the thumbnail -// // it supports extending the shard hex to support deeper directory structures in the future -// fn get_thumb_key(cas_id: &str, kind: ThumbnailKind) -> Vec { -// vec![ -// match kind { -// ThumbnailKind::Ephemeral => String::from(EPHEMERAL_DIR), -// ThumbnailKind::Indexed(library_id) => library_id.to_string(), -// }, -// get_shard_hex(cas_id).to_string(), -// cas_id.to_string(), -// ] -// } - -// #[cfg(feature = "ffmpeg")] -// pub(super) static THUMBNAILABLE_VIDEO_EXTENSIONS: Lazy> = Lazy::new(|| { -// ALL_VIDEO_EXTENSIONS -// .iter() -// .cloned() -// .filter(can_generate_thumbnail_for_video) -// .map(Extension::Video) -// .collect() -// }); - -// pub(super) static THUMBNAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { -// ALL_IMAGE_EXTENSIONS -// .iter() -// .cloned() -// .filter(can_generate_thumbnail_for_image) -// .map(Extension::Image) -// .chain( -// ALL_DOCUMENT_EXTENSIONS -// .iter() -// .cloned() -// .filter(can_generate_thumbnail_for_document) -// .map(Extension::Document), -// ) -// .collect() -// }); - -// pub(super) static ALL_THUMBNAILABLE_EXTENSIONS: Lazy> = Lazy::new(|| { -// #[cfg(feature = "ffmpeg")] -// return THUMBNAILABLE_EXTENSIONS -// .iter() -// .cloned() -// .chain(THUMBNAILABLE_VIDEO_EXTENSIONS.iter().cloned()) -// .collect(); - -// #[cfg(not(feature = "ffmpeg"))] -// THUMBNAILABLE_EXTENSIONS.clone() -// }); - -// #[derive(Error, Debug)] -// pub enum ThumbnailerError { -// // Internal errors -// #[error("database error: {0}")] -// Database(#[from] prisma_client_rust::QueryError), -// #[error(transparent)] -// FileIO(#[from] FileIOError), -// #[error(transparent)] -// VersionManager(#[from] VersionManagerError), -// #[error("failed to encode webp")] -// WebPEncoding { path: Box, reason: String }, -// #[error("error while converting the image")] -// SdImages { -// path: Box, -// error: sd_images::Error, -// }, -// #[error("failed to execute converting task: {0}")] -// Task(#[from] task::JoinError), -// #[cfg(feature = "ffmpeg")] -// #[error(transparent)] -// FFmpeg(#[from] sd_ffmpeg::Error), -// #[error("thumbnail generation timed out for {}", .0.display())] -// TimedOut(Box), -// } - -// #[cfg(feature = "ffmpeg")] -// pub const fn can_generate_thumbnail_for_video(video_extension: &VideoExtension) -> bool { -// use VideoExtension::*; -// // File extensions that are specifically not supported by the thumbnailer -// !matches!(video_extension, Mpg | Swf | M2v | Hevc | M2ts | Mts | Ts) -// } - -// pub const fn can_generate_thumbnail_for_image(image_extension: &ImageExtension) -> bool { -// use ImageExtension::*; - -// matches!( -// image_extension, -// Jpg | Jpeg | Png | Webp | Gif | Svg | Heic | Heics | Heif | Heifs | Avif | Bmp | Ico -// ) -// } - -// pub const fn can_generate_thumbnail_for_document(document_extension: &DocumentExtension) -> bool { -// use DocumentExtension::*; - -// matches!(document_extension, Pdf) -// } diff --git a/core/src/object/media/old_thumbnail/old_actor.rs b/core/src/object/media/old_thumbnail/old_actor.rs deleted file mode 100644 index 8996c81f355b..000000000000 --- a/core/src/object/media/old_thumbnail/old_actor.rs +++ /dev/null @@ -1,335 +0,0 @@ -use crate::{ - api::CoreEvent, - library::{Libraries, LibraryId, LibraryManagerEvent}, - node::config::NodePreferences, -}; - -use sd_prisma::prisma::{location, PrismaClient}; -use sd_utils::error::{FileIOError, NonUtf8PathError}; - -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; - -use async_channel as chan; -use once_cell::sync::OnceCell; -use thiserror::Error; -use tokio::{ - fs, spawn, - sync::{broadcast, oneshot, watch, Mutex}, - time::{sleep, Instant}, -}; -use tracing::{error, trace}; -use uuid::Uuid; - -use super::{ - directory::init_thumbnail_dir, - process::{generate_thumbnail, ThumbData}, - state::RegisterReporter, - worker::{old_worker, WorkerChannels}, - BatchToProcess, ThumbnailKind, ThumbnailerError, ONE_SEC, THUMBNAIL_CACHE_DIR_NAME, -}; - -static AVAILABLE_PARALLELISM: OnceCell = OnceCell::new(); - -#[derive(Error, Debug)] -pub(super) enum ActorError { - #[error("database error")] - Database(#[from] prisma_client_rust::QueryError), - #[error(transparent)] - FileIO(#[from] FileIOError), - #[error(transparent)] - NonUtf8Path(#[from] NonUtf8PathError), -} - -#[derive(Debug)] -pub(super) enum DatabaseMessage { - Add(Uuid, Arc), - Update(Uuid, Arc), - Remove(Uuid), -} - -// Thumbnails directory have the following structure: -// thumbnails/ -// ├── version.txt -// ├── thumbs_to_process.bin # processing save state -// ├── ephemeral/ # ephemeral ones have it's own directory -// │ └── [0..3]/ # sharding -// │ └── .webp -// └── / # we segregate thumbnails by library -// └── [0..3]/ # sharding -// └── .webp -pub struct OldThumbnailer { - thumbnails_directory: Arc, - cas_ids_to_delete_tx: chan::Sender<(Vec, ThumbnailKind)>, - thumbnails_to_generate_tx: chan::Sender<(BatchToProcess, ThumbnailKind)>, - progress_reporter_tx: chan::Sender, - last_single_thumb_generated: Mutex, - reporter: broadcast::Sender, - cancel_tx: chan::Sender>, -} - -impl OldThumbnailer { - pub async fn new( - data_dir: impl AsRef, - libraries_manager: Arc, - reporter: broadcast::Sender, - node_preferences_rx: watch::Receiver, - ) -> Self { - let data_dir = data_dir.as_ref(); - let thumbnails_directory = Arc::new( - init_thumbnail_dir(data_dir, Arc::clone(&libraries_manager)) - .await - .unwrap_or_else(|e| { - error!("Failed to initialize thumbnail directory: {e:#?}"); - data_dir.join(THUMBNAIL_CACHE_DIR_NAME) - }), - ); - - let (progress_management_tx, progress_management_rx) = chan::bounded(16); - - let (databases_tx, databases_rx) = chan::bounded(4); - let (thumbnails_to_generate_tx, ephemeral_thumbnails_to_generate_rx) = chan::unbounded(); - let (cas_ids_to_delete_tx, cas_ids_to_delete_rx) = chan::bounded(16); - let (cancel_tx, cancel_rx) = chan::bounded(1); - - AVAILABLE_PARALLELISM - .set(std::thread::available_parallelism().map_or_else( - |e| { - error!("Failed to get available parallelism: {e:#?}"); - 4 - }, - |non_zero| non_zero.get(), - )) - .ok(); - - spawn({ - let progress_management_rx = progress_management_rx.clone(); - let cancel_rx = cancel_rx.clone(); - let thumbnails_directory = Arc::clone(&thumbnails_directory); - let reporter = reporter.clone(); - let node_preferences = node_preferences_rx.clone(); - - async move { - while let Err(e) = spawn(old_worker( - *AVAILABLE_PARALLELISM - .get() - .expect("BATCH_SIZE is set at thumbnailer new method"), - node_preferences.clone(), - reporter.clone(), - thumbnails_directory.clone(), - WorkerChannels { - progress_management_rx: progress_management_rx.clone(), - databases_rx: databases_rx.clone(), - cas_ids_to_delete_rx: cas_ids_to_delete_rx.clone(), - thumbnails_to_generate_rx: ephemeral_thumbnails_to_generate_rx.clone(), - cancel_rx: cancel_rx.clone(), - }, - )) - .await - { - error!( - "Error on Thumbnail Remover Actor; \ - Error: {e}; \ - Restarting the worker loop...", - ); - } - } - }); - - spawn({ - let rx = libraries_manager.rx.clone(); - let thumbnails_directory = Arc::clone(&thumbnails_directory); - - async move { - let subscribe_res = rx - .subscribe(|event| { - let databases_tx = databases_tx.clone(); - - let thumbnails_directory = &thumbnails_directory; - - async move { - match event { - LibraryManagerEvent::Load(library) => { - let library_dir = - thumbnails_directory.join(library.id.to_string()); - - if let Err(e) = fs::create_dir_all(&library_dir).await { - error!( - "Failed to create library dir for thumbnails: {:#?}", - FileIOError::from((library_dir, e)) - ); - } - - databases_tx - .send(DatabaseMessage::Add( - library.id, - Arc::clone(&library.db), - )) - .await - .expect("critical thumbnailer error: databases channel closed on send add") - } - - LibraryManagerEvent::Edit(library) - | LibraryManagerEvent::InstancesModified(library) => databases_tx - .send(DatabaseMessage::Update( - library.id, - Arc::clone(&library.db), - )) - .await - .expect("critical thumbnailer error: databases channel closed on send update"), - - LibraryManagerEvent::Delete(library) => databases_tx - .send(DatabaseMessage::Remove(library.id)) - .await - .expect("critical thumbnailer error: databases channel closed on send delete"), - } - } - }) - .await; - - if subscribe_res.is_err() { - error!("Thumbnailer actor has crashed...") - } - } - }); - - Self { - thumbnails_directory, - cas_ids_to_delete_tx, - thumbnails_to_generate_tx, - progress_reporter_tx: progress_management_tx, - last_single_thumb_generated: Mutex::new(Instant::now()), - reporter, - cancel_tx, - } - } - - #[inline] - async fn new_batch(&self, batch: BatchToProcess, kind: ThumbnailKind) { - if !batch.batch.is_empty() { - self.thumbnails_to_generate_tx - .send((batch, kind)) - .await - .expect("critical thumbnailer error: failed to send new batch"); - } else { - trace!("Empty batch received, skipping..."); - } - } - - #[inline] - pub async fn new_ephemeral_thumbnails_batch(&self, batch: BatchToProcess) { - self.new_batch(batch, ThumbnailKind::Ephemeral).await - } - - #[inline] - pub async fn new_indexed_thumbnails_batch(&self, batch: BatchToProcess, library_id: LibraryId) { - self.new_batch(batch, ThumbnailKind::Indexed(library_id)) - .await - } - - #[inline] - pub async fn new_indexed_thumbnails_tracked_batch( - &self, - mut batch: BatchToProcess, - library_id: LibraryId, - location_id: location::id::Type, - ) { - batch.location_id = Some(location_id); - - self.new_batch(batch, ThumbnailKind::Indexed(library_id)) - .await; - } - - #[inline] - pub async fn register_reporter( - &self, - location_id: location::id::Type, - progress_tx: chan::Sender<(u32, u32)>, - ) { - self.progress_reporter_tx - .send((location_id, progress_tx)) - .await - .expect("critical thumbnailer error: failed to send register reporter fn"); - } - - #[inline] - async fn remove_cas_ids(&self, cas_ids: Vec, kind: ThumbnailKind) { - self.cas_ids_to_delete_tx - .send((cas_ids, kind)) - .await - .expect("critical thumbnailer error: failed to send cas ids to delete"); - } - - #[inline] - pub async fn remove_ephemeral_cas_ids(&self, cas_ids: Vec) { - self.remove_cas_ids(cas_ids, ThumbnailKind::Ephemeral).await - } - - #[inline] - pub async fn remove_indexed_cas_ids(&self, cas_ids: Vec, library_id: LibraryId) { - self.remove_cas_ids(cas_ids, ThumbnailKind::Indexed(library_id)) - .await - } - - #[inline] - pub async fn shutdown(&self) { - let (tx, rx) = oneshot::channel(); - self.cancel_tx - .send(tx) - .await - .expect("critical thumbnailer error: failed to send shutdown signal"); - - rx.await - .expect("critical thumbnailer error: failed to receive shutdown signal response"); - } - - /// WARNING!!!! DON'T USE THIS METHOD IN A LOOP!!!!!!!!!!!!! It will be pretty slow on purpose! - pub async fn generate_single_indexed_thumbnail( - &self, - extension: &str, - cas_id: String, - path: impl AsRef, - library_id: LibraryId, - ) -> Result<(), ThumbnailerError> { - self.generate_single_thumbnail(extension, cas_id, path, ThumbnailKind::Indexed(library_id)) - .await - } - - async fn generate_single_thumbnail( - &self, - extension: &str, - cas_id: String, - path: impl AsRef, - kind: ThumbnailKind, - ) -> Result<(), ThumbnailerError> { - let mut last_single_thumb_generated_guard = self.last_single_thumb_generated.lock().await; - - let elapsed = Instant::now() - *last_single_thumb_generated_guard; - if elapsed < ONE_SEC { - // This will choke up in case someone try to use this method in a loop, otherwise - // it will consume all the machine resources like a gluton monster from hell - sleep(ONE_SEC - elapsed).await; - } - - let res = generate_thumbnail( - self.thumbnails_directory.as_ref().clone(), - ThumbData { - extension, - cas_id, - path, - in_background: false, - should_regenerate: false, - kind, - }, - self.reporter.clone(), - ) - .await - .map(|_| ()); - - *last_single_thumb_generated_guard = Instant::now(); - - res - } -} diff --git a/core/src/object/media/old_thumbnail/preferences.rs b/core/src/object/media/old_thumbnail/preferences.rs deleted file mode 100644 index 39c116e0c834..000000000000 --- a/core/src/object/media/old_thumbnail/preferences.rs +++ /dev/null @@ -1,34 +0,0 @@ -use serde::{Deserialize, Serialize}; -use specta::Type; - -#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq, Type)] -pub struct ThumbnailerPreferences { - background_processing_percentage: u8, // 0-100 -} - -impl Default for ThumbnailerPreferences { - fn default() -> Self { - Self { - background_processing_percentage: 50, // 50% of CPU cores available - } - } -} - -impl ThumbnailerPreferences { - pub fn background_processing_percentage(&self) -> u8 { - self.background_processing_percentage - } - - pub fn set_background_processing_percentage( - &mut self, - mut background_processing_percentage: u8, - ) -> &mut Self { - if background_processing_percentage > 100 { - background_processing_percentage = 100; - } - - self.background_processing_percentage = background_processing_percentage; - - self - } -} diff --git a/core/src/object/media/old_thumbnail/process.rs b/core/src/object/media/old_thumbnail/process.rs deleted file mode 100644 index 136680551c08..000000000000 --- a/core/src/object/media/old_thumbnail/process.rs +++ /dev/null @@ -1,483 +0,0 @@ -use crate::api::CoreEvent; - -use sd_file_ext::extensions::{DocumentExtension, ImageExtension}; -use sd_images::{format_image, scale_dimensions, ConvertibleExtension}; -use sd_media_metadata::exif::Orientation; -use sd_prisma::prisma::location; -use sd_utils::error::FileIOError; - -use std::{ - collections::VecDeque, - ffi::OsString, - ops::Deref, - path::{Path, PathBuf}, - str::FromStr, - sync::Arc, -}; - -use async_channel as chan; -use futures_concurrency::future::{Join, Race}; -use image::{imageops, DynamicImage, GenericImageView}; -use serde::{Deserialize, Serialize}; -use tokio::{ - fs, io, - sync::{broadcast, oneshot, Semaphore}, - task::{spawn, spawn_blocking}, - time::timeout, -}; -use tokio_stream::StreamExt; -use tracing::{debug, error, trace, warn}; -use webp::Encoder; - -use super::{ - can_generate_thumbnail_for_document, can_generate_thumbnail_for_image, get_thumb_key, - preferences::ThumbnailerPreferences, shard::get_shard_hex, ThumbnailKind, ThumbnailerError, - EPHEMERAL_DIR, TARGET_PX, TARGET_QUALITY, THIRTY_SECS, WEBP_EXTENSION, -}; - -#[derive(Debug, Serialize, Deserialize)] -pub struct GenerateThumbnailArgs { - pub extension: String, - pub cas_id: String, - pub path: PathBuf, -} - -impl GenerateThumbnailArgs { - pub fn new(extension: String, cas_id: String, path: PathBuf) -> Self { - Self { - extension, - cas_id, - path, - } - } -} - -#[derive(Debug, Serialize, Deserialize)] -pub struct BatchToProcess { - pub(super) batch: Vec, - pub(super) should_regenerate: bool, - pub(super) in_background: bool, - pub(super) location_id: Option, -} - -impl BatchToProcess { - pub fn new( - batch: Vec, - should_regenerate: bool, - in_background: bool, - ) -> Self { - Self { - batch, - should_regenerate, - in_background, - location_id: None, - } - } -} - -pub(super) struct ProcessorControlChannels { - pub stop_rx: chan::Receiver>, - pub done_tx: oneshot::Sender<()>, - pub batch_report_progress_tx: chan::Sender<(location::id::Type, u32)>, -} - -pub(super) async fn batch_processor( - thumbnails_directory: Arc, - ( - BatchToProcess { - batch, - should_regenerate, - in_background, - location_id, - }, - kind, - ): (BatchToProcess, ThumbnailKind), - generated_ephemeral_thumbs_file_names_tx: chan::Sender>, - ProcessorControlChannels { - stop_rx, - done_tx, - batch_report_progress_tx, - }: ProcessorControlChannels, - leftovers_tx: chan::Sender<(BatchToProcess, ThumbnailKind)>, - reporter: broadcast::Sender, - (available_parallelism, thumbnailer_preferences): (usize, ThumbnailerPreferences), -) { - let in_parallel_count = if !in_background { - available_parallelism - } else { - usize::max( - // If the user sets the background processing percentage to 0, we still want to process at least sequentially - thumbnailer_preferences.background_processing_percentage() as usize - * available_parallelism - / 100, - 1, - ) - }; - - debug!( - "Processing thumbnails batch of kind {kind:?} with size {} in {}, \ - at most {in_parallel_count} thumbnails at a time", - batch.len(), - if in_background { - "background" - } else { - "foreground" - }, - ); - - let semaphore = Arc::new(Semaphore::new(in_parallel_count)); - - let batch_size = batch.len(); - - // Transforming to `VecDeque` so we don't need to move anything as we consume from the beginning - // This from is guaranteed to be O(1) - let mut queue = VecDeque::from(batch); - - enum RaceOutputs { - Processed, - Stop(oneshot::Sender<()>), - } - - let (maybe_cas_ids_tx, maybe_cas_ids_rx) = if kind == ThumbnailKind::Ephemeral { - let (tx, rx) = chan::bounded(batch_size); - (Some(tx), Some(rx)) - } else { - (None, None) - }; - - let maybe_stopped_tx = if let RaceOutputs::Stop(stopped_tx) = ( - async { - let mut join_handles = Vec::with_capacity(batch_size); - - while !queue.is_empty() { - let permit = Arc::clone(&semaphore) - .acquire_owned() - .await - .expect("this semaphore never closes"); - - let GenerateThumbnailArgs { - extension, - cas_id, - path, - } = queue.pop_front().expect("queue is not empty"); - - // As we got a permit, then there is available CPU to process this thumbnail - join_handles.push(spawn({ - let reporter = reporter.clone(); - let thumbnails_directory = thumbnails_directory.as_ref().clone(); - let report_progress_tx = batch_report_progress_tx.clone(); - let maybe_cas_ids_tx = maybe_cas_ids_tx.clone(); - - async move { - let res = timeout(THIRTY_SECS, async { - generate_thumbnail( - thumbnails_directory, - ThumbData { - extension: &extension, - cas_id, - path: &path, - in_background, - should_regenerate, - kind, - }, - reporter, - ) - .await - .map(|cas_id| { - // this send_blocking never blocks as we have a bounded channel with - // the same capacity as the batch size, so there is always a space - // in the queue - if let Some(cas_ids_tx) = maybe_cas_ids_tx { - if cas_ids_tx - .send_blocking(OsString::from(format!("{}.webp", cas_id))) - .is_err() - { - warn!("No one to listen to generated ephemeral thumbnail cas id"); - } - } - }) - }) - .await - .unwrap_or_else(|_| { - Err(ThumbnailerError::TimedOut(path.into_boxed_path())) - }); - - if let Some(location_id) = location_id { - report_progress_tx.send((location_id, 1)).await.ok(); - } - - drop(permit); - - res - } - })); - } - - for res in join_handles.join().await { - match res { - Ok(Ok(())) => { /* Everything is awesome! */ } - Ok(Err(e)) => { - error!( - "Failed to generate thumbnail for {} location: {e:#?}", - if let ThumbnailKind::Ephemeral = kind { - "ephemeral" - } else { - "indexed" - } - ) - } - Err(e) => { - error!("Failed to join thumbnail generation task: {e:#?}"); - } - } - } - - if let Some(cas_ids_tx) = &maybe_cas_ids_tx { - cas_ids_tx.close(); - } - - trace!("Processed batch with {batch_size} thumbnails"); - - RaceOutputs::Processed - }, - async { - let tx = stop_rx - .recv() - .await - .expect("Critical error on thumbnails actor"); - trace!("Received a stop signal"); - RaceOutputs::Stop(tx) - }, - ) - .race() - .await - { - // Our queue is always contiguous, so this `from` is free - let leftovers = Vec::from(queue); - - trace!( - "Stopped with {} thumbnails left to process", - leftovers.len() - ); - if !leftovers.is_empty() - && leftovers_tx - .send(( - BatchToProcess { - batch: leftovers, - should_regenerate, - in_background: true, // Leftovers should always be in background - location_id, - }, - kind, - )) - .await - .is_err() - { - error!("Thumbnail actor is dead: Failed to send leftovers") - } - - if let Some(cas_ids_tx) = &maybe_cas_ids_tx { - cas_ids_tx.close(); - } - - Some(stopped_tx) - } else { - None - }; - - if let Some(cas_ids_rx) = maybe_cas_ids_rx { - if generated_ephemeral_thumbs_file_names_tx - .send(cas_ids_rx.collect().await) - .await - .is_err() - { - error!("Thumbnail actor is dead: Failed to send generated cas ids") - } - } - - if let Some(stopped_tx) = maybe_stopped_tx { - stopped_tx.send(()).ok(); - } else { - trace!("Finished batch!"); - } - - done_tx.send(()).ok(); -} - -pub(super) struct ThumbData<'ext, P: AsRef> { - pub extension: &'ext str, - pub cas_id: String, - pub path: P, - pub in_background: bool, - pub should_regenerate: bool, - pub kind: ThumbnailKind, -} - -pub(super) async fn generate_thumbnail( - thumbnails_directory: PathBuf, - ThumbData { - extension, - cas_id, - path, - in_background, - should_regenerate, - kind, - }: ThumbData<'_, impl AsRef>, - reporter: broadcast::Sender, -) -> Result { - let path = path.as_ref(); - trace!("Generating thumbnail for {}", path.display()); - - let mut output_path = thumbnails_directory; - match kind { - ThumbnailKind::Ephemeral => output_path.push(EPHEMERAL_DIR), - ThumbnailKind::Indexed(library_id) => output_path.push(library_id.to_string()), - }; - output_path.push(get_shard_hex(&cas_id)); - output_path.push(&cas_id); - output_path.set_extension(WEBP_EXTENSION); - - if let Err(e) = fs::metadata(&output_path).await { - if e.kind() != io::ErrorKind::NotFound { - error!( - "Failed to check if thumbnail exists, but we will try to generate it anyway: {e:#?}" - ); - } - // Otherwise we good, thumbnail doesn't exist so we can generate it - } else if !should_regenerate { - trace!( - "Skipping thumbnail generation for {} because it already exists", - path.display() - ); - return Ok(cas_id); - } - - if let Ok(extension) = ImageExtension::from_str(extension) { - if can_generate_thumbnail_for_image(&extension) { - generate_image_thumbnail(&path, &output_path).await?; - } - } else if let Ok(extension) = DocumentExtension::from_str(extension) { - if can_generate_thumbnail_for_document(&extension) { - generate_image_thumbnail(&path, &output_path).await?; - } - } - - #[cfg(feature = "ffmpeg")] - { - use crate::object::media::old_thumbnail::can_generate_thumbnail_for_video; - use sd_file_ext::extensions::VideoExtension; - - if let Ok(extension) = VideoExtension::from_str(extension) { - if can_generate_thumbnail_for_video(&extension) { - generate_video_thumbnail(&path, &output_path).await?; - } - } - } - // This if is REALLY needed, due to the sheer performance of the thumbnailer, - // I restricted to only send events notifying for thumbnails in the current - // opened directory, sending events for the entire location turns into a - // humongous bottleneck in the frontend lol, since it doesn't even knows - // what to do with thumbnails for inner directories lol - // - fogodev - if !in_background { - trace!("Emitting new thumbnail event"); - if reporter - .send(CoreEvent::NewThumbnail { - thumb_key: get_thumb_key(&cas_id, kind), - }) - .is_err() - { - warn!("Error sending event to Node's event bus"); - } - } - - trace!("Generated thumbnail for {}", path.display()); - - Ok(cas_id) -} - -async fn generate_image_thumbnail( - file_path: impl AsRef, - output_path: impl AsRef, -) -> Result<(), ThumbnailerError> { - let file_path = file_path.as_ref().to_path_buf(); - - let webp = spawn_blocking(move || -> Result<_, ThumbnailerError> { - let mut img = format_image(&file_path).map_err(|e| ThumbnailerError::SdImages { - path: file_path.clone().into_boxed_path(), - error: e, - })?; - - let (w, h) = img.dimensions(); - let (w_scaled, h_scaled) = scale_dimensions(w as f32, h as f32, TARGET_PX); - - // Optionally, resize the existing photo and convert back into DynamicImage - if w != w_scaled && h != h_scaled { - img = DynamicImage::ImageRgba8(imageops::resize( - &img, - w_scaled, - h_scaled, - imageops::FilterType::Triangle, - )); - } - - // this corrects the rotation/flip of the image based on the *available* exif data - // not all images have exif data, so we don't error. we also don't rotate HEIF as that's against the spec - if let Some(orientation) = Orientation::from_path(&file_path) { - if ConvertibleExtension::try_from(file_path.as_ref()) - .expect("we already checked if the image was convertible") - .should_rotate() - { - img = orientation.correct_thumbnail(img); - } - } - - // Create the WebP encoder for the above image - let encoder = - Encoder::from_image(&img).map_err(|reason| ThumbnailerError::WebPEncoding { - path: file_path.into_boxed_path(), - reason: reason.to_string(), - })?; - - // Type WebPMemory is !Send, which makes the Future in this function !Send, - // this make us `deref` to have a `&[u8]` and then `to_owned` to make a Vec - // which implies on a unwanted clone... - Ok(encoder.encode(TARGET_QUALITY).deref().to_owned()) - }) - .await??; - - let output_path = output_path.as_ref(); - - if let Some(shard_dir) = output_path.parent() { - fs::create_dir_all(shard_dir) - .await - .map_err(|e| FileIOError::from((shard_dir, e)))?; - } else { - error!( - "Failed to get parent directory of '{}' for sharding parent directory", - output_path.display() - ); - } - - fs::write(output_path, &webp) - .await - .map_err(|e| FileIOError::from((output_path, e))) - .map_err(Into::into) -} - -#[cfg(feature = "ffmpeg")] -async fn generate_video_thumbnail( - file_path: impl AsRef + Send, - output_path: impl AsRef + Send, -) -> Result<(), ThumbnailerError> { - use sd_ffmpeg::{to_thumbnail, ThumbnailSize}; - - to_thumbnail( - file_path, - output_path, - ThumbnailSize::Scale(1024), - TARGET_QUALITY, - ) - .await - .map_err(Into::into) -} diff --git a/core/src/object/media/old_thumbnail/shard.rs b/core/src/object/media/old_thumbnail/shard.rs deleted file mode 100644 index be61e2034162..000000000000 --- a/core/src/object/media/old_thumbnail/shard.rs +++ /dev/null @@ -1,13 +0,0 @@ -/// The practice of dividing files into hex coded folders, often called "sharding," -/// is mainly used to optimize file system performance. File systems can start to slow down -/// as the number of files in a directory increases. Thus, it's often beneficial to split -/// files into multiple directories to avoid this performance degradation. - -/// `get_shard_hex` takes a cas_id (a hexadecimal hash) as input and returns the first -/// three characters of the hash as the directory name. Because we're using these first -/// three characters of a the hash, this will give us 4096 (16^3) possible directories, -/// named 000 to fff. -pub fn get_shard_hex(cas_id: &str) -> &str { - // Use the first three characters of the hash as the directory name - &cas_id[0..3] -} diff --git a/core/src/object/media/old_thumbnail/state.rs b/core/src/object/media/old_thumbnail/state.rs deleted file mode 100644 index 7cce8d1df8c3..000000000000 --- a/core/src/object/media/old_thumbnail/state.rs +++ /dev/null @@ -1,225 +0,0 @@ -use crate::library::LibraryId; - -use sd_prisma::prisma::location; -use sd_utils::error::FileIOError; - -use std::{ - collections::{hash_map::Entry, HashMap, HashSet, VecDeque}, - ffi::OsString, - path::Path, -}; - -use async_channel as chan; -use futures_concurrency::future::TryJoin; -use serde::{Deserialize, Serialize}; -use tokio::{fs, io}; -use tracing::{error, info, trace}; - -use super::{ - get_shard_hex, old_actor::ActorError, BatchToProcess, ThumbnailKind, EPHEMERAL_DIR, - SAVE_STATE_FILE, -}; - -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct OldThumbsProcessingSaveState { - pub(super) bookkeeper: BookKeeper, - pub(super) ephemeral_file_names: HashSet, - // This queues doubles as LIFO and FIFO, assuming LIFO in case of users asking for a new batch - // by entering a new directory in the explorer, otherwise processing as FIFO - pub(super) queue: VecDeque<(BatchToProcess, ThumbnailKind)>, - // These below are FIFO queues, so we can process leftovers from the previous batch first - pub(super) indexed_leftovers_queue: VecDeque<(BatchToProcess, LibraryId)>, - pub(super) ephemeral_leftovers_queue: VecDeque, -} - -impl Default for OldThumbsProcessingSaveState { - fn default() -> Self { - Self { - bookkeeper: BookKeeper::default(), - ephemeral_file_names: HashSet::with_capacity(128), - queue: VecDeque::with_capacity(32), - indexed_leftovers_queue: VecDeque::with_capacity(8), - ephemeral_leftovers_queue: VecDeque::with_capacity(8), - } - } -} - -impl OldThumbsProcessingSaveState { - pub(super) async fn load(thumbnails_directory: impl AsRef) -> Self { - let resume_file = thumbnails_directory.as_ref().join(SAVE_STATE_FILE); - - match fs::read(&resume_file).await { - Ok(bytes) => { - let this = rmp_serde::from_slice::(&bytes).unwrap_or_else(|e| { - error!("Failed to deserialize save state at thumbnailer actor: {e:#?}"); - Self::default() - }); - - if let Err(e) = fs::remove_file(&resume_file).await { - error!( - "Failed to remove save state file at thumbnailer actor: {:#?}", - FileIOError::from((resume_file, e)) - ); - } - - info!( - "Resuming thumbnailer actor state: Existing ephemeral thumbs: {}; \ - Queued batches waiting processing: {}", - this.ephemeral_file_names.len(), - this.queue.len() - + this.indexed_leftovers_queue.len() - + this.ephemeral_leftovers_queue.len() - ); - - this - } - Err(e) if e.kind() == io::ErrorKind::NotFound => { - trace!("No save state found at thumbnailer actor"); - Self::default() - } - Err(e) => { - error!( - "Failed to read save state at thumbnailer actor: {:#?}", - FileIOError::from((resume_file, e)) - ); - Self::default() - } - } - } - - pub(super) async fn store(self, thumbnails_directory: impl AsRef) { - let resume_file = thumbnails_directory.as_ref().join(SAVE_STATE_FILE); - - info!( - "Saving thumbnailer actor state: Existing ephemeral thumbs: {}; \ - Queued batches waiting processing: {}", - self.ephemeral_file_names.len(), - self.queue.len() - + self.indexed_leftovers_queue.len() - + self.ephemeral_leftovers_queue.len() - ); - - let Ok(bytes) = rmp_serde::to_vec_named(&self).map_err(|e| { - error!("Failed to serialize save state at thumbnailer actor: {e:#?}"); - }) else { - return; - }; - - if let Err(e) = fs::write(&resume_file, bytes).await { - error!( - "Failed to write save state at thumbnailer actor: {:#?}", - FileIOError::from((resume_file, e)) - ); - } - } -} - -pub(super) async fn remove_by_cas_ids( - thumbnails_directory: &Path, - cas_ids: Vec, - kind: ThumbnailKind, -) -> Result<(), ActorError> { - let base_dir = match kind { - ThumbnailKind::Ephemeral => thumbnails_directory.join(EPHEMERAL_DIR), - ThumbnailKind::Indexed(library_id) => thumbnails_directory.join(library_id.to_string()), - }; - - cas_ids - .into_iter() - .map(|cas_id| { - let thumbnail_path = base_dir.join(format!("{}/{cas_id}.webp", get_shard_hex(&cas_id))); - - trace!("Removing thumbnail: {}", thumbnail_path.display()); - - async move { - match fs::remove_file(&thumbnail_path).await { - Ok(()) => Ok(()), - Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(()), - Err(e) => Err(FileIOError::from((thumbnail_path, e))), - } - } - }) - .collect::>() - .try_join() - .await?; - - Ok(()) -} - -pub(super) type RegisterReporter = (location::id::Type, chan::Sender<(u32, u32)>); - -#[derive(Debug, Serialize, Deserialize)] -pub(super) struct BookKeeper { - work_progress: HashMap, // (pending, total) - - // We can't save reporter function or a channel to disk, the job must ask again to be registered - #[serde(skip, default)] - reporter_by_location: HashMap>, -} -impl Default for BookKeeper { - fn default() -> Self { - Self { - work_progress: HashMap::with_capacity(8), - reporter_by_location: HashMap::with_capacity(8), - } - } -} - -impl BookKeeper { - pub(super) async fn add_work(&mut self, location_id: location::id::Type, thumbs_count: u32) { - let (in_progress, total) = match self.work_progress.entry(location_id) { - Entry::Occupied(mut entry) => { - let (in_progress, total) = entry.get_mut(); - - *total += thumbs_count; - - (*in_progress, *total) - } - Entry::Vacant(entry) => { - entry.insert((0, thumbs_count)); - - (0, thumbs_count) - } - }; - - if let Some(progress_tx) = self.reporter_by_location.get(&location_id) { - if progress_tx.send((in_progress, total)).await.is_err() { - error!( - "Failed to send progress update to reporter on location " - ); - } - } - } - - pub(super) fn register_reporter( - &mut self, - location_id: location::id::Type, - reporter_tx: chan::Sender<(u32, u32)>, - ) { - self.reporter_by_location.insert(location_id, reporter_tx); - } - - pub(super) async fn add_progress(&mut self, location_id: location::id::Type, progress: u32) { - if let Some((current_progress, total)) = self.work_progress.get_mut(&location_id) { - *current_progress += progress; - - if *current_progress == *total { - if let Some(progress_tx) = self.reporter_by_location.remove(&location_id) { - if progress_tx.send((*current_progress, *total)).await.is_err() { - error!( - "Failed to send progress update to reporter on location " - ); - } - } - - self.work_progress.remove(&location_id); - } else if let Some(progress_tx) = self.reporter_by_location.get(&location_id) { - if progress_tx.send((*current_progress, *total)).await.is_err() { - error!( - "Failed to send progress update to reporter on location " - ); - } - } - } - } -} diff --git a/core/src/object/media/old_thumbnail/worker.rs b/core/src/object/media/old_thumbnail/worker.rs deleted file mode 100644 index c3696028a060..000000000000 --- a/core/src/object/media/old_thumbnail/worker.rs +++ /dev/null @@ -1,350 +0,0 @@ -use crate::{api::CoreEvent, node::config::NodePreferences}; - -use sd_prisma::prisma::location; - -use std::{collections::HashMap, ffi::OsString, path::PathBuf, pin::pin, sync::Arc}; - -use async_channel as chan; -use futures_concurrency::stream::Merge; -use tokio::{ - spawn, - sync::{broadcast, oneshot, watch}, - time::{interval, interval_at, timeout, Instant, MissedTickBehavior}, -}; -use tokio_stream::{ - wrappers::{IntervalStream, WatchStream}, - StreamExt, -}; -use tracing::{debug, error, trace}; - -use super::{ - clean_up::{process_ephemeral_clean_up, process_indexed_clean_up}, - old_actor::DatabaseMessage, - preferences::ThumbnailerPreferences, - process::{batch_processor, ProcessorControlChannels}, - state::{remove_by_cas_ids, OldThumbsProcessingSaveState, RegisterReporter}, - BatchToProcess, ThumbnailKind, HALF_HOUR, ONE_SEC, THIRTY_SECS, -}; - -#[derive(Debug, Clone)] -pub(super) struct WorkerChannels { - pub(super) progress_management_rx: chan::Receiver, - pub(super) databases_rx: chan::Receiver, - pub(super) cas_ids_to_delete_rx: chan::Receiver<(Vec, ThumbnailKind)>, - pub(super) thumbnails_to_generate_rx: chan::Receiver<(BatchToProcess, ThumbnailKind)>, - pub(super) cancel_rx: chan::Receiver>, -} - -pub(super) async fn old_worker( - available_parallelism: usize, - node_preferences_rx: watch::Receiver, - reporter: broadcast::Sender, - thumbnails_directory: Arc, - WorkerChannels { - progress_management_rx, - databases_rx, - cas_ids_to_delete_rx, - thumbnails_to_generate_rx, - cancel_rx, - }: WorkerChannels, -) { - let mut to_remove_interval = interval_at(Instant::now() + THIRTY_SECS, HALF_HOUR); - to_remove_interval.set_missed_tick_behavior(MissedTickBehavior::Skip); - - let mut idle_interval = interval(ONE_SEC); - idle_interval.set_missed_tick_behavior(MissedTickBehavior::Skip); - - let mut databases = HashMap::new(); - - #[derive(Debug)] - enum StreamMessage { - RemovalTick, - ToDelete((Vec, ThumbnailKind)), - Database(DatabaseMessage), - NewBatch((BatchToProcess, ThumbnailKind)), - Leftovers((BatchToProcess, ThumbnailKind)), - NewEphemeralThumbnailsFilenames(Vec), - ProgressManagement(RegisterReporter), - BatchProgress((location::id::Type, u32)), - Shutdown(oneshot::Sender<()>), - UpdatedPreferences(ThumbnailerPreferences), - IdleTick, - } - - let OldThumbsProcessingSaveState { - mut bookkeeper, - mut ephemeral_file_names, - mut queue, - mut indexed_leftovers_queue, - mut ephemeral_leftovers_queue, - } = OldThumbsProcessingSaveState::load(thumbnails_directory.as_ref()).await; - - let (generated_ephemeral_thumbnails_tx, ephemeral_thumbnails_cas_ids_rx) = chan::bounded(32); - let (leftovers_tx, leftovers_rx) = chan::bounded(8); - let (batch_report_progress_tx, batch_report_progress_rx) = chan::bounded(8); - let (stop_older_processing_tx, stop_older_processing_rx) = chan::bounded(1); - - let mut shutdown_leftovers_rx = pin!(leftovers_rx.clone()); - let mut shutdown_batch_report_progress_rx = pin!(batch_report_progress_rx.clone()); - - let mut current_batch_processing_rx: Option> = None; - - let mut msg_stream = pin!(( - IntervalStream::new(to_remove_interval).map(|_| StreamMessage::RemovalTick), - cas_ids_to_delete_rx.map(StreamMessage::ToDelete), - databases_rx.map(StreamMessage::Database), - thumbnails_to_generate_rx.map(StreamMessage::NewBatch), - leftovers_rx.map(StreamMessage::Leftovers), - ephemeral_thumbnails_cas_ids_rx.map(StreamMessage::NewEphemeralThumbnailsFilenames), - progress_management_rx.map(StreamMessage::ProgressManagement), - batch_report_progress_rx.map(StreamMessage::BatchProgress), - cancel_rx.map(StreamMessage::Shutdown), - IntervalStream::new(idle_interval).map(|_| StreamMessage::IdleTick), - WatchStream::new(node_preferences_rx).map(|node_preferences| { - StreamMessage::UpdatedPreferences(node_preferences.thumbnailer) - }), - ) - .merge()); - - let mut thumbnailer_preferences = ThumbnailerPreferences::default(); - - while let Some(msg) = msg_stream.next().await { - match msg { - StreamMessage::IdleTick => { - if let Some(done_rx) = current_batch_processing_rx.as_mut() { - // Checking if the previous run finished or was aborted to clean state - match done_rx.try_recv() { - Ok(()) | Err(oneshot::error::TryRecvError::Closed) => { - current_batch_processing_rx = None; - } - - Err(oneshot::error::TryRecvError::Empty) => { - // The previous run is still running - continue; - } - } - } - - if current_batch_processing_rx.is_none() - && (!queue.is_empty() - || !indexed_leftovers_queue.is_empty() - || !ephemeral_leftovers_queue.is_empty()) - { - let (done_tx, done_rx) = oneshot::channel(); - current_batch_processing_rx = Some(done_rx); - - let batch_and_kind = if let Some(batch_and_kind) = queue.pop_front() { - batch_and_kind - } else if let Some((batch, library_id)) = indexed_leftovers_queue.pop_front() { - // indexed leftovers have bigger priority - (batch, ThumbnailKind::Indexed(library_id)) - } else if let Some(batch) = ephemeral_leftovers_queue.pop_front() { - (batch, ThumbnailKind::Ephemeral) - } else { - continue; - }; - - spawn(batch_processor( - thumbnails_directory.clone(), - batch_and_kind, - generated_ephemeral_thumbnails_tx.clone(), - ProcessorControlChannels { - stop_rx: stop_older_processing_rx.clone(), - done_tx, - batch_report_progress_tx: batch_report_progress_tx.clone(), - }, - leftovers_tx.clone(), - reporter.clone(), - (available_parallelism, thumbnailer_preferences.clone()), - )); - } - } - - StreamMessage::RemovalTick => { - // For any of them we process a clean up if a time since the last one already passed - if !databases.is_empty() { - spawn(process_indexed_clean_up( - thumbnails_directory.clone(), - databases - .iter() - .map(|(id, db)| (*id, Arc::clone(db))) - .collect::>(), - )); - } - - if !ephemeral_file_names.is_empty() { - spawn(process_ephemeral_clean_up( - thumbnails_directory.clone(), - ephemeral_file_names.clone(), - )); - } - } - - StreamMessage::ToDelete((cas_ids, kind)) => { - if !cas_ids.is_empty() { - if let Err(e) = remove_by_cas_ids(&thumbnails_directory, cas_ids, kind).await { - error!("Got an error when trying to remove thumbnails: {e:#?}"); - } - } - } - - StreamMessage::NewBatch((batch, kind)) => { - let in_background = batch.in_background; - - if let Some(location_id) = batch.location_id { - bookkeeper - .add_work(location_id, batch.batch.len() as u32) - .await; - } - - trace!( - "New {kind:?} batch to process in {}, size: {}", - if in_background { - "background" - } else { - "foreground" - }, - batch.batch.len() - ); - - if in_background { - queue.push_back((batch, kind)); - } else { - // If a processing must be in foreground, then it takes maximum priority - queue.push_front((batch, kind)); - } - - // Only sends stop signal if there is a batch being processed - if !in_background { - stop_batch( - ¤t_batch_processing_rx, - &stop_older_processing_tx, - &stop_older_processing_rx, - ) - .await; - } - } - - StreamMessage::Leftovers((batch, ThumbnailKind::Indexed(library_id))) => { - indexed_leftovers_queue.push_back((batch, library_id)) - } - - StreamMessage::Leftovers((batch, ThumbnailKind::Ephemeral)) => { - ephemeral_leftovers_queue.push_back(batch) - } - - StreamMessage::Database(DatabaseMessage::Add(id, db)) - | StreamMessage::Database(DatabaseMessage::Update(id, db)) => { - databases.insert(id, db); - } - - StreamMessage::Database(DatabaseMessage::Remove(id)) => { - databases.remove(&id); - } - - StreamMessage::NewEphemeralThumbnailsFilenames(new_ephemeral_thumbs) => { - trace!("New ephemeral thumbnails: {}", new_ephemeral_thumbs.len()); - ephemeral_file_names.extend(new_ephemeral_thumbs); - } - - StreamMessage::BatchProgress((location_id, progressed)) => { - bookkeeper.add_progress(location_id, progressed).await; - } - - StreamMessage::Shutdown(cancel_tx) => { - debug!("Thumbnail actor is shutting down..."); - let start = Instant::now(); - - stop_batch( - ¤t_batch_processing_rx, - &stop_older_processing_tx, - &stop_older_processing_rx, - ) - .await; - - // Closing the leftovers channel to stop the batch processor as we already sent - // an stop signal - leftovers_tx.close(); - while let Some((batch, kind)) = shutdown_leftovers_rx.next().await { - match kind { - ThumbnailKind::Indexed(library_id) => { - indexed_leftovers_queue.push_back((batch, library_id)) - } - ThumbnailKind::Ephemeral => ephemeral_leftovers_queue.push_back(batch), - } - } - - // Consuming the last progress reports to keep everything up to date - shutdown_batch_report_progress_rx.close(); - while let Some((location_id, progressed)) = - shutdown_batch_report_progress_rx.next().await - { - bookkeeper.add_progress(location_id, progressed).await; - } - - // Saving state - OldThumbsProcessingSaveState { - bookkeeper, - ephemeral_file_names, - queue, - indexed_leftovers_queue, - ephemeral_leftovers_queue, - } - .store(thumbnails_directory.as_ref()) - .await; - - // Signaling that we're done shutting down - cancel_tx.send(()).ok(); - - debug!("Thumbnailer has been shutdown in {:?}", start.elapsed()); - return; - } - - StreamMessage::ProgressManagement((location_id, progress_tx)) => { - bookkeeper.register_reporter(location_id, progress_tx); - } - - StreamMessage::UpdatedPreferences(preferences) => { - thumbnailer_preferences = preferences; - stop_batch( - ¤t_batch_processing_rx, - &stop_older_processing_tx, - &stop_older_processing_rx, - ) - .await; - } - } - } -} - -#[inline] -async fn stop_batch( - current_batch_processing_rx: &Option>, - stop_older_processing_tx: &chan::Sender>, - stop_older_processing_rx: &chan::Receiver>, -) { - // First stopping the current batch processing - if current_batch_processing_rx.is_some() { - trace!("Sending stop signal to older processing"); - - let (tx, rx) = oneshot::channel(); - - match stop_older_processing_tx.try_send(tx) { - Ok(()) => { - // We put a timeout here to avoid a deadlock in case the older processing already - // finished its batch - if timeout(ONE_SEC, rx).await.is_err() { - stop_older_processing_rx.recv().await.ok(); - } - } - Err(e) if e.is_full() => { - // The last signal we sent happened after a batch was already processed - // So we clean the channel and we're good to go. - stop_older_processing_rx.recv().await.ok(); - } - Err(_) => { - error!("Thumbnail actor died when trying to stop older processing"); - } - } - } -} diff --git a/core/src/object/mod.rs b/core/src/object/mod.rs index f71961a75da9..e4de76f43d93 100644 --- a/core/src/object/mod.rs +++ b/core/src/object/mod.rs @@ -1,29 +1,3 @@ -use sd_prisma::prisma::{file_path, object}; - -use serde::{Deserialize, Serialize}; -use specta::Type; - -pub mod cas; pub mod fs; -pub mod media; -// pub mod old_file_identifier; -// pub mod old_orphan_remover; pub mod tag; pub mod validation; - -// Objects are primarily created by the identifier from Paths -// Some Objects are purely virtual, unless they have one or more associated Paths, which refer to a file found in a Location -// Objects are what can be added to Spaces - -// The response to provide the Explorer when looking at Objects -#[derive(Debug, Serialize, Deserialize, Type)] -pub struct ObjectsForExplorer { - pub objects: Vec, - // pub context: ExplorerContext, -} - -#[derive(Debug, Serialize, Deserialize, Type)] -pub enum ObjectData { - Object(Box), - Path(Box), -} diff --git a/core/src/object/old_file_identifier/mod.rs b/core/src/object/old_file_identifier/mod.rs deleted file mode 100644 index cb94810827e0..000000000000 --- a/core/src/object/old_file_identifier/mod.rs +++ /dev/null @@ -1,404 +0,0 @@ -use crate::{library::Library, object::cas::generate_cas_id, old_job::JobError}; - -use sd_core_file_path_helper::{FilePathError, IsolatedFilePathData}; -use sd_core_prisma_helpers::{file_path_for_file_identifier, object_for_file_identifier}; - -use sd_file_ext::{extensions::Extension, kind::ObjectKind}; -use sd_prisma::{ - prisma::{file_path, location, object, PrismaClient}, - prisma_sync, -}; -use sd_sync::{CRDTOperation, OperationFactory}; -use sd_utils::{db::maybe_missing, error::FileIOError, msgpack, uuid_to_bytes}; - -use std::{ - collections::{HashMap, HashSet}, - fmt::Debug, - path::Path, -}; - -use futures::future::join_all; -use tokio::fs; -use tracing::{error, trace}; -use uuid::Uuid; - -// pub mod old_file_identifier_job; -// mod shallow; - -// pub use shallow::*; - -// we break these jobs into chunks of 100 to improve performance -const CHUNK_SIZE: usize = 100; - -#[derive(thiserror::Error, Debug)] -pub enum FileIdentifierJobError { - #[error("received sub path not in database: ", .0.display())] - SubPathNotFound(Box), - - // Internal Errors - #[error(transparent)] - FilePathError(#[from] FilePathError), - #[error("database error: {0}")] - Database(#[from] prisma_client_rust::QueryError), -} - -#[derive(Debug, Clone)] -pub struct FileMetadata { - pub cas_id: Option, - pub kind: ObjectKind, - pub fs_metadata: std::fs::Metadata, -} - -impl FileMetadata { - /// Assembles `create_unchecked` params for a given file path - pub async fn new( - location_path: impl AsRef, - iso_file_path: &IsolatedFilePathData<'_>, // TODO: use dedicated CreateUnchecked type - ) -> Result { - let path = location_path.as_ref().join(iso_file_path); - - let fs_metadata = fs::metadata(&path) - .await - .map_err(|e| FileIOError::from((&path, e)))?; - - assert!( - !fs_metadata.is_dir(), - "We can't generate cas_id for directories" - ); - - // derive Object kind - let kind = Extension::resolve_conflicting(&path, false) - .await - .map(Into::into) - .unwrap_or(ObjectKind::Unknown); - - let cas_id = if fs_metadata.len() != 0 { - generate_cas_id(&path, fs_metadata.len()) - .await - .map(Some) - .map_err(|e| FileIOError::from((&path, e)))? - } else { - // We can't do shit with empty files - None - }; - - trace!("Analyzed file: {path:?} {cas_id:?} {kind:?}"); - - Ok(FileMetadata { - cas_id, - kind, - fs_metadata, - }) - } -} - -async fn identifier_job_step( - Library { db, sync, .. }: &Library, - location: &location::Data, - file_paths: &[file_path_for_file_identifier::Data], -) -> Result<(usize, usize), JobError> { - let location_path = maybe_missing(&location.path, "location.path").map(Path::new)?; - - let file_paths_metadatas = join_all( - file_paths - .iter() - .filter_map(|file_path| { - IsolatedFilePathData::try_from((location.id, file_path)) - .map(|iso_file_path| (iso_file_path, file_path)) - .map_err(|e| error!("Failed to extract isolated file path data: {e:#?}")) - .ok() - }) - .map(|(iso_file_path, file_path)| async move { - FileMetadata::new(&location_path, &iso_file_path) - .await - .map(|metadata| { - ( - // SAFETY: This should never happen - Uuid::from_slice(&file_path.pub_id) - .expect("file_path.pub_id is invalid!"), - (metadata, file_path), - ) - }) - .map_err(|e| { - #[cfg(target_os = "windows")] - { - // Handle case where file is on-demand (NTFS only) - if e.source.raw_os_error().map_or(false, |code| code == 362) { - error!("Failed to extract metadata from on-demand file: {e:#?}"); - } else { - error!("Failed to extract file metadata: {e:#?}") - } - } - - #[cfg(not(target_os = "windows"))] - { - error!("Failed to extract file metadata: {e:#?}"); - } - }) - .ok() - }), - ) - .await - .into_iter() - .flatten() - .collect::>(); - - let unique_cas_ids = file_paths_metadatas - .values() - .filter_map(|(metadata, _)| metadata.cas_id.clone()) - .collect::>() - .into_iter() - .collect(); - - // Assign cas_id to each file path - sync.write_ops( - db, - file_paths_metadatas - .iter() - .map(|(pub_id, (metadata, _))| { - ( - sync.shared_update( - prisma_sync::file_path::SyncId { - pub_id: sd_utils::uuid_to_bytes(*pub_id), - }, - file_path::cas_id::NAME, - msgpack!(&metadata.cas_id), - ), - db.file_path().update( - file_path::pub_id::equals(sd_utils::uuid_to_bytes(*pub_id)), - vec![file_path::cas_id::set(metadata.cas_id.clone())], - ), - ) - }) - .unzip::<_, _, _, Vec<_>>(), - ) - .await?; - - // Retrieves objects that are already connected to file paths with the same id - let existing_objects = db - .object() - .find_many(vec![object::file_paths::some(vec![ - file_path::cas_id::in_vec(unique_cas_ids), - ])]) - .select(object_for_file_identifier::select()) - .exec() - .await?; - - let existing_object_cas_ids = existing_objects - .iter() - .flat_map(|object| { - object - .file_paths - .iter() - .filter_map(|file_path| file_path.cas_id.as_ref()) - }) - .collect::>(); - - // Attempt to associate each file path with an object that has been - // connected to file paths with the same cas_id - let updated_file_paths = sync - .write_ops( - db, - file_paths_metadatas - .iter() - .filter_map(|(pub_id, (metadata, file_path))| { - // Filtering out files without cas_id due to being empty - metadata - .cas_id - .is_some() - .then_some((pub_id, (metadata, file_path))) - }) - .flat_map(|(pub_id, (metadata, _))| { - existing_objects - .iter() - .find(|object| { - object - .file_paths - .iter() - .any(|file_path| file_path.cas_id == metadata.cas_id) - }) - .map(|object| (*pub_id, object)) - }) - .map(|(pub_id, object)| { - let (crdt_op, db_op) = connect_file_path_to_object( - pub_id, - // SAFETY: This pub_id is generated by the uuid lib, but we have to store bytes in sqlite - Uuid::from_slice(&object.pub_id).expect("uuid bytes are invalid"), - sync, - db, - ); - - (crdt_op, db_op.select(file_path::select!({ pub_id }))) - }) - .unzip::<_, _, Vec<_>, Vec<_>>(), - ) - .await?; - - trace!( - "Found {} existing Objects in Library, linking file paths...", - existing_objects.len() - ); - - // extract objects that don't already exist in the database - let file_paths_requiring_new_object = file_paths_metadatas - .into_iter() - .filter(|(_, (FileMetadata { cas_id, .. }, _))| { - cas_id - .as_ref() - .map(|cas_id| !existing_object_cas_ids.contains(cas_id)) - .unwrap_or(true) - }) - .collect::>(); - - let total_created = if !file_paths_requiring_new_object.is_empty() { - trace!( - "Creating {} new Objects in Library", - file_paths_requiring_new_object.len(), - ); - - let (object_create_args, file_path_update_args): (Vec<_>, Vec<_>) = - file_paths_requiring_new_object - .iter() - .map( - |( - file_path_pub_id, - ( - FileMetadata { kind, .. }, - file_path_for_file_identifier::Data { date_created, .. }, - ), - )| { - let object_pub_id = Uuid::new_v4(); - let sync_id = || prisma_sync::object::SyncId { - pub_id: sd_utils::uuid_to_bytes(object_pub_id), - }; - - let kind = *kind as i32; - - let (sync_params, db_params): (Vec<_>, Vec<_>) = [ - ( - (object::date_created::NAME, msgpack!(date_created)), - object::date_created::set(*date_created), - ), - ( - (object::kind::NAME, msgpack!(kind)), - object::kind::set(Some(kind)), - ), - ] - .into_iter() - .unzip(); - - ( - ( - sync.shared_create(sync_id(), sync_params), - object::create_unchecked(uuid_to_bytes(object_pub_id), db_params), - ), - { - let (crdt_op, db_op) = connect_file_path_to_object( - *file_path_pub_id, - object_pub_id, - sync, - db, - ); - - (crdt_op, db_op.select(file_path::select!({ pub_id }))) - }, - ) - }, - ) - .unzip(); - - // create new object records with assembled values - let total_created_files = sync - .write_ops(db, { - let (sync, db_params): (Vec<_>, Vec<_>) = object_create_args.into_iter().unzip(); - - ( - sync.into_iter().flatten().collect(), - db.object().create_many(db_params), - ) - }) - .await - .unwrap_or_else(|e| { - error!("Error inserting files: {:#?}", e); - 0 - }); - - trace!("Created {} new Objects in Library", total_created_files); - - if total_created_files > 0 { - trace!("Updating file paths with created objects"); - - sync.write_ops(db, { - let data: (Vec<_>, Vec<_>) = file_path_update_args.into_iter().unzip(); - - data - }) - .await?; - - trace!("Updated file paths with created objects"); - } - - total_created_files as usize - } else { - 0 - }; - - Ok((total_created, updated_file_paths.len())) -} - -fn connect_file_path_to_object<'db>( - file_path_id: Uuid, - object_id: Uuid, - sync: &crate::sync::Manager, - db: &'db PrismaClient, -) -> (CRDTOperation, file_path::UpdateQuery<'db>) { - #[cfg(debug_assertions)] - trace!("Connecting to "); - - let vec_id = object_id.as_bytes().to_vec(); - - ( - sync.shared_update( - prisma_sync::file_path::SyncId { - pub_id: sd_utils::uuid_to_bytes(file_path_id), - }, - file_path::object::NAME, - msgpack!(prisma_sync::object::SyncId { - pub_id: vec_id.clone() - }), - ), - db.file_path().update( - file_path::pub_id::equals(sd_utils::uuid_to_bytes(file_path_id)), - vec![file_path::object::connect(object::pub_id::equals(vec_id))], - ), - ) -} - -async fn process_identifier_file_paths( - location: &location::Data, - file_paths: &[file_path_for_file_identifier::Data], - step_number: usize, - cursor: file_path::id::Type, - library: &Library, - orphan_count: usize, -) -> Result<(usize, usize, file_path::id::Type), JobError> { - trace!( - "Processing {:?} orphan Paths. ({} completed of {})", - file_paths.len(), - step_number, - orphan_count - ); - - let (total_objects_created, total_objects_linked) = - identifier_job_step(library, location, file_paths).await?; - - Ok(( - total_objects_created, - total_objects_linked, - // returns a new cursor to the last row of this chunk or the current one - file_paths - .last() - .map(|last_row| last_row.id) - .unwrap_or(cursor), - )) -} diff --git a/core/src/object/old_file_identifier/old_file_identifier_job.rs b/core/src/object/old_file_identifier/old_file_identifier_job.rs deleted file mode 100644 index 69494b3fd4c1..000000000000 --- a/core/src/object/old_file_identifier/old_file_identifier_job.rs +++ /dev/null @@ -1,339 +0,0 @@ -use crate::{ - api::CoreEvent, - library::Library, - location::ScanState, - old_job::{ - CurrentStep, JobError, JobInitOutput, JobReportUpdate, JobResult, JobRunMetadata, - JobStepOutput, StatefulJob, WorkerContext, - }, -}; - -use sd_core_file_path_helper::{ - ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - IsolatedFilePathData, -}; -use sd_core_prisma_helpers::file_path_for_file_identifier; - -use sd_prisma::prisma::{file_path, location, PrismaClient, SortOrder}; -use sd_utils::db::maybe_missing; - -use std::{ - hash::{Hash, Hasher}, - path::{Path, PathBuf}, -}; - -use prisma_client_rust::or; -use serde::{Deserialize, Serialize}; -use serde_json::json; -use tracing::{debug, info, trace}; - -use super::{process_identifier_file_paths, FileIdentifierJobError, CHUNK_SIZE}; - -/// `FileIdentifierJobInit` takes file_paths without an object_id from a location -/// or starting from a `sub_path` getting every descendent from this `sub_path` -/// and uniquely identifies them: -/// - first: generating the cas_id and extracting metadata -/// - finally: creating unique object records, and linking them to their file_paths -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct OldFileIdentifierJobInit { - pub location: location::Data, - pub sub_path: Option, // subpath to start from -} - -impl Hash for OldFileIdentifierJobInit { - fn hash(&self, state: &mut H) { - self.location.id.hash(state); - if let Some(ref sub_path) = self.sub_path { - sub_path.hash(state); - } - } -} - -#[derive(Serialize, Deserialize, Debug)] -pub struct OldFileIdentifierJobData { - location_path: PathBuf, - maybe_sub_iso_file_path: Option>, -} - -#[derive(Serialize, Deserialize, Default, Debug)] -pub struct OldFileIdentifierJobRunMetadata { - cursor: file_path::id::Type, - total_orphan_paths: usize, - total_objects_created: usize, - total_objects_linked: usize, - total_objects_ignored: usize, -} - -impl JobRunMetadata for OldFileIdentifierJobRunMetadata { - fn update(&mut self, new_data: Self) { - self.total_orphan_paths += new_data.total_orphan_paths; - self.total_objects_created += new_data.total_objects_created; - self.total_objects_linked += new_data.total_objects_linked; - self.total_objects_ignored += new_data.total_objects_ignored; - self.cursor = new_data.cursor; - } -} - -#[async_trait::async_trait] -impl StatefulJob for OldFileIdentifierJobInit { - type Data = OldFileIdentifierJobData; - type Step = (); - type RunMetadata = OldFileIdentifierJobRunMetadata; - - const NAME: &'static str = "file_identifier"; - const IS_BATCHED: bool = true; - - fn target_location(&self) -> location::id::Type { - self.location.id - } - - async fn init( - &self, - ctx: &WorkerContext, - data: &mut Option, - ) -> Result, JobError> { - let init = self; - let Library { db, .. } = &*ctx.library; - - debug!("Identifying orphan File Paths..."); - - let location_id = init.location.id; - - let location_path = maybe_missing(&init.location.path, "location.path").map(Path::new)?; - - let maybe_sub_iso_file_path = match &init.sub_path { - Some(sub_path) if sub_path != Path::new("") => { - let full_path = ensure_sub_path_is_in_location(location_path, sub_path) - .await - .map_err(FileIdentifierJobError::from)?; - ensure_sub_path_is_directory(location_path, sub_path) - .await - .map_err(FileIdentifierJobError::from)?; - - let sub_iso_file_path = - IsolatedFilePathData::new(location_id, location_path, &full_path, true) - .map_err(FileIdentifierJobError::from)?; - - ensure_file_path_exists( - sub_path, - &sub_iso_file_path, - db, - FileIdentifierJobError::SubPathNotFound, - ) - .await?; - - Some(sub_iso_file_path) - } - _ => None, - }; - - let orphan_count = - count_orphan_file_paths(db, location_id, &maybe_sub_iso_file_path).await?; - - // Initializing `state.data` here because we need a complete state in case of early finish - *data = Some(OldFileIdentifierJobData { - location_path: location_path.to_path_buf(), - maybe_sub_iso_file_path, - }); - - let data = data.as_ref().expect("we just set it"); - - if orphan_count == 0 { - return Err(JobError::EarlyFinish { - name: ::NAME.to_string(), - reason: "Found no orphan file paths to process".to_string(), - }); - } - - debug!("Found {} orphan file paths", orphan_count); - - let task_count = (orphan_count as f64 / CHUNK_SIZE as f64).ceil() as usize; - debug!( - "Found {} orphan Paths. Will execute {} tasks...", - orphan_count, task_count - ); - - let first_path = db - .file_path() - .find_first(orphan_path_filters( - location_id, - None, - &data.maybe_sub_iso_file_path, - )) - .select(file_path::select!({ id })) - .exec() - .await? - .expect("We already validated before that there are orphans `file_path`s"); - - ctx.progress(vec![ - JobReportUpdate::TaskCount(orphan_count), - JobReportUpdate::Message(format!("Found {orphan_count} files to be identified")), - ]); - - Ok(( - OldFileIdentifierJobRunMetadata { - total_orphan_paths: orphan_count, - cursor: first_path.id, - ..Default::default() - }, - vec![(); task_count], - ) - .into()) - } - - async fn execute_step( - &self, - ctx: &WorkerContext, - CurrentStep { step_number, .. }: CurrentStep<'_, Self::Step>, - data: &Self::Data, - run_metadata: &Self::RunMetadata, - ) -> Result, JobError> { - let init = self; - let location = &init.location; - - let mut new_metadata = Self::RunMetadata::default(); - - // get chunk of orphans to process - let file_paths = get_orphan_file_paths( - &ctx.library.db, - location.id, - run_metadata.cursor, - &data.maybe_sub_iso_file_path, - ) - .await?; - - // if no file paths found, abort entire job early, there is nothing to do - // if we hit this error, there is something wrong with the data/query - if file_paths.is_empty() { - return Err(JobError::EarlyFinish { - name: ::NAME.to_string(), - reason: "Expected orphan Paths not returned from database query for this chunk" - .to_string(), - }); - } - - let (total_objects_created, total_objects_linked, new_cursor) = - process_identifier_file_paths( - location, - &file_paths, - step_number, - run_metadata.cursor, - &ctx.library, - run_metadata.total_orphan_paths, - ) - .await?; - - new_metadata.total_objects_created = total_objects_created; - new_metadata.total_objects_linked = total_objects_linked; - new_metadata.cursor = new_cursor; - - // send an array of ids to let clients know new objects were identified - ctx.node.emit(CoreEvent::NewIdentifiedObjects { - file_path_ids: file_paths.iter().map(|fp| fp.id).collect(), - }); - - ctx.progress(vec![ - JobReportUpdate::CompletedTaskCount(step_number * CHUNK_SIZE + file_paths.len()), - JobReportUpdate::Message(format!( - "Processed {} of {} orphan Paths", - step_number * CHUNK_SIZE, - run_metadata.total_orphan_paths - )), - ]); - - Ok(new_metadata.into()) - } - - async fn finalize( - &self, - ctx: &WorkerContext, - _data: &Option, - run_metadata: &Self::RunMetadata, - ) -> JobResult { - let init = self; - info!("Finalizing identifier job: {:?}", &run_metadata); - - ctx.library - .db - .location() - .update( - location::id::equals(init.location.id), - vec![location::scan_state::set(ScanState::FilesIdentified as i32)], - ) - .exec() - .await - .map_err(FileIdentifierJobError::from)?; - - Ok(Some(json!({"init: ": init, "run_metadata": run_metadata}))) - } -} - -fn orphan_path_filters( - location_id: location::id::Type, - file_path_id: Option, - maybe_sub_iso_file_path: &Option>, -) -> Vec { - sd_utils::chain_optional_iter( - [ - or!( - file_path::object_id::equals(None), - file_path::cas_id::equals(None) - ), - file_path::is_dir::equals(Some(false)), - file_path::location_id::equals(Some(location_id)), - file_path::size_in_bytes_bytes::not(Some(0u64.to_be_bytes().to_vec())), - ], - [ - // this is a workaround for the cursor not working properly - file_path_id.map(file_path::id::gte), - maybe_sub_iso_file_path.as_ref().map(|sub_iso_file_path| { - file_path::materialized_path::starts_with( - sub_iso_file_path - .materialized_path_for_children() - .expect("sub path iso_file_path must be a directory"), - ) - }), - ], - ) -} - -async fn count_orphan_file_paths( - db: &PrismaClient, - location_id: location::id::Type, - maybe_sub_materialized_path: &Option>, -) -> Result { - db.file_path() - .count(orphan_path_filters( - location_id, - None, - maybe_sub_materialized_path, - )) - .exec() - .await - .map(|c| c as usize) -} - -async fn get_orphan_file_paths( - db: &PrismaClient, - location_id: location::id::Type, - file_path_id: file_path::id::Type, - maybe_sub_materialized_path: &Option>, -) -> Result, prisma_client_rust::QueryError> { - trace!( - "Querying {} orphan Paths at cursor: {:?}", - CHUNK_SIZE, - file_path_id - ); - db.file_path() - .find_many(orphan_path_filters( - location_id, - Some(file_path_id), - maybe_sub_materialized_path, - )) - .order_by(file_path::id::order(SortOrder::Asc)) - .take(CHUNK_SIZE as i64) - // .skip(1) - .select(file_path_for_file_identifier::select()) - .exec() - .await -} diff --git a/core/src/object/old_file_identifier/shallow.rs b/core/src/object/old_file_identifier/shallow.rs deleted file mode 100644 index 04355be15f4d..000000000000 --- a/core/src/object/old_file_identifier/shallow.rs +++ /dev/null @@ -1,182 +0,0 @@ -use crate::{invalidate_query, library::Library, old_job::JobError}; - -use sd_core_file_path_helper::{ - ensure_file_path_exists, ensure_sub_path_is_directory, ensure_sub_path_is_in_location, - IsolatedFilePathData, -}; -use sd_core_prisma_helpers::file_path_for_file_identifier; - -use sd_prisma::prisma::{file_path, location, PrismaClient, SortOrder}; -use sd_utils::db::maybe_missing; - -use std::path::{Path, PathBuf}; - -use prisma_client_rust::or; -use serde::{Deserialize, Serialize}; -use tracing::{trace, warn}; - -use super::{process_identifier_file_paths, FileIdentifierJobError, CHUNK_SIZE}; - -#[derive(Serialize, Deserialize)] -pub struct ShallowFileIdentifierJobState { - cursor: file_path::id::Type, - sub_iso_file_path: IsolatedFilePathData<'static>, -} - -pub async fn old_shallow( - location: &location::Data, - sub_path: &PathBuf, - library: &Library, -) -> Result<(), JobError> { - let Library { db, .. } = library; - - warn!("Identifying orphan File Paths..."); - - let location_id = location.id; - let location_path = maybe_missing(&location.path, "location.path").map(Path::new)?; - - let sub_iso_file_path = if sub_path != Path::new("") { - let full_path = ensure_sub_path_is_in_location(location_path, &sub_path) - .await - .map_err(FileIdentifierJobError::from)?; - ensure_sub_path_is_directory(location_path, &sub_path) - .await - .map_err(FileIdentifierJobError::from)?; - - let sub_iso_file_path = - IsolatedFilePathData::new(location_id, location_path, &full_path, true) - .map_err(FileIdentifierJobError::from)?; - - ensure_file_path_exists( - &sub_path, - &sub_iso_file_path, - db, - FileIdentifierJobError::SubPathNotFound, - ) - .await?; - - sub_iso_file_path - } else { - IsolatedFilePathData::new(location_id, location_path, location_path, true) - .map_err(FileIdentifierJobError::from)? - }; - - let orphan_count = count_orphan_file_paths(db, location_id, &sub_iso_file_path).await?; - - if orphan_count == 0 { - return Ok(()); - } - - let task_count = (orphan_count as f64 / CHUNK_SIZE as f64).ceil() as usize; - warn!( - "Found {} orphan Paths. Will execute {} tasks...", - orphan_count, task_count - ); - - let Some(first_path) = db - .file_path() - .find_first(orphan_path_filters(location_id, None, &sub_iso_file_path)) - // .order_by(file_path::id::order(Direction::Asc)) - .select(file_path::select!({ id })) - .exec() - .await? - else { - warn!("No orphan Paths found due to another Job finishing first"); - return Ok(()); - }; - - // Initializing `state.data` here because we need a complete state in case of early finish - let mut data = ShallowFileIdentifierJobState { - cursor: first_path.id, - sub_iso_file_path, - }; - - for step_number in 0..task_count { - let ShallowFileIdentifierJobState { - cursor, - sub_iso_file_path, - } = &mut data; - - // get chunk of orphans to process - let file_paths = - get_orphan_file_paths(&library.db, location.id, *cursor, sub_iso_file_path).await?; - - let (_, _, new_cursor) = process_identifier_file_paths( - location, - &file_paths, - step_number, - *cursor, - library, - orphan_count, - ) - .await?; - *cursor = new_cursor; - } - - invalidate_query!(library, "search.paths"); - invalidate_query!(library, "search.objects"); - - Ok(()) -} - -fn orphan_path_filters( - location_id: location::id::Type, - file_path_id: Option, - sub_iso_file_path: &IsolatedFilePathData<'_>, -) -> Vec { - sd_utils::chain_optional_iter( - [ - or!( - file_path::object_id::equals(None), - file_path::cas_id::equals(None) - ), - file_path::is_dir::equals(Some(false)), - file_path::location_id::equals(Some(location_id)), - file_path::materialized_path::equals(Some( - sub_iso_file_path - .materialized_path_for_children() - .expect("sub path for shallow identifier must be a directory"), - )), - file_path::size_in_bytes_bytes::not(Some(0u64.to_be_bytes().to_vec())), - ], - [file_path_id.map(file_path::id::gte)], - ) -} - -async fn count_orphan_file_paths( - db: &PrismaClient, - location_id: location::id::Type, - sub_iso_file_path: &IsolatedFilePathData<'_>, -) -> Result { - db.file_path() - .count(orphan_path_filters(location_id, None, sub_iso_file_path)) - .exec() - .await - .map(|c| c as usize) -} - -async fn get_orphan_file_paths( - db: &PrismaClient, - location_id: location::id::Type, - file_path_id_cursor: file_path::id::Type, - sub_iso_file_path: &IsolatedFilePathData<'_>, -) -> Result, prisma_client_rust::QueryError> { - trace!( - "Querying {} orphan Paths at cursor: {:?}", - CHUNK_SIZE, - file_path_id_cursor - ); - db.file_path() - .find_many(orphan_path_filters( - location_id, - Some(file_path_id_cursor), - sub_iso_file_path, - )) - .order_by(file_path::id::order(SortOrder::Asc)) - // .cursor(cursor.into()) - .take(CHUNK_SIZE as i64) - // .skip(1) - .select(file_path_for_file_identifier::select()) - .exec() - .await -} diff --git a/core/src/object/old_orphan_remover.rs b/core/src/object/old_orphan_remover.rs index 35b3168423f1..68882a9ad9ee 100644 --- a/core/src/object/old_orphan_remover.rs +++ b/core/src/object/old_orphan_remover.rs @@ -9,6 +9,8 @@ use tokio::{ }; use tracing::{error, trace}; +// TODO(fogodev): To be rewritten using new task system + const TEN_SECONDS: Duration = Duration::from_secs(10); const ONE_MINUTE: Duration = Duration::from_secs(60); diff --git a/core/src/object/tag/mod.rs b/core/src/object/tag/mod.rs index c6a228c1e4a6..82b82c3d7ef2 100644 --- a/core/src/object/tag/mod.rs +++ b/core/src/object/tag/mod.rs @@ -4,8 +4,6 @@ use sd_prisma::{prisma::tag, prisma_sync}; use sd_sync::*; use chrono::{DateTime, FixedOffset, Utc}; - -use sd_utils::msgpack; use serde::Deserialize; use specta::Type; use uuid::Uuid; diff --git a/core/src/old_job/error.rs b/core/src/old_job/error.rs index e6fcaaf26bda..e5315916c1f9 100644 --- a/core/src/old_job/error.rs +++ b/core/src/old_job/error.rs @@ -56,12 +56,6 @@ pub enum JobError { Critical(&'static str), // Specific job errors - // #[error(transparent)] - // Indexer(#[from] IndexerError), - // #[error(transparent)] - // MediaProcessor(#[from] MediaProcessorError), - // #[error(transparent)] - // FileIdentifier(#[from] FileIdentifierJobError), #[error(transparent)] Validator(#[from] ValidatorError), #[error(transparent)] diff --git a/core/src/old_job/manager.rs b/core/src/old_job/manager.rs index f27edea2c4a2..586811aee6d3 100644 --- a/core/src/old_job/manager.rs +++ b/core/src/old_job/manager.rs @@ -1,13 +1,10 @@ use crate::{ library::Library, - // location::indexer::old_indexer_job::OldIndexerJobInit, object::{ fs::{ old_copy::OldFileCopierJobInit, old_cut::OldFileCutterJobInit, old_delete::OldFileDeleterJobInit, old_erase::OldFileEraserJobInit, }, - // media::old_media_processor::OldMediaProcessorJobInit, - // old_file_identifier::old_file_identifier_job::OldFileIdentifierJobInit, validation::old_validator_job::OldObjectValidatorJobInit, }, old_job::{worker::Worker, DynJob, JobError, OldJob}, @@ -66,9 +63,8 @@ impl Actor { } } -/// JobManager handles queueing and executing jobs using the `DynJob` -/// Handling persisting JobReports to the database, pause/resuming, and -/// +/// JobManager handles queueing and executing jobs using the [`DynJob`] +/// Handling persisting JobReports to the database, pause/resuming pub struct OldJobs { current_jobs_hashes: RwLock>, job_queue: RwLock>>, @@ -396,9 +392,6 @@ fn initialize_resumable_job( Err(JobError::UnknownJobName(job_report.id, job_report.name)) }, jobs = [ - // OldMediaProcessorJobInit, - // OldIndexerJobInit, - // OldFileIdentifierJobInit, OldObjectValidatorJobInit, OldFileCutterJobInit, OldFileCopierJobInit, diff --git a/core/src/old_job/mod.rs b/core/src/old_job/mod.rs index 9bb7d86782e8..d9d8a525712f 100644 --- a/core/src/old_job/mod.rs +++ b/core/src/old_job/mod.rs @@ -149,20 +149,23 @@ pub trait DynJob: Send + Sync { async fn cancel_children(&mut self, library: &Library) -> Result<(), JobError>; } -pub struct JobBuilder { +pub struct OldJob { id: Uuid, - init: SJob, - report_builder: JobReportBuilder, + hash: u64, + report: Option, + state: Option>, + next_jobs: VecDeque>, } -impl JobBuilder { - pub fn build(self) -> Box> { +impl OldJob { + pub fn new(init: SJob) -> Box { + let id = Uuid::new_v4(); Box::new(OldJob:: { - id: self.id, - hash: ::hash(&self.init), - report: Some(self.report_builder.build()), + id, + hash: ::hash(&init), + report: Some(JobReportBuilder::new(id, SJob::NAME.to_string()).build()), state: Some(JobState { - init: self.init, + init, data: None, steps: VecDeque::new(), step_number: 0, @@ -172,64 +175,6 @@ impl JobBuilder { }) } - pub fn new(init: SJob) -> Self { - let id = Uuid::new_v4(); - Self { - id, - init, - report_builder: JobReportBuilder::new(id, SJob::NAME.to_string()), - } - } - - pub fn with_action(mut self, action: impl AsRef) -> Self { - self.report_builder = self.report_builder.with_action(action); - self - } - - pub fn with_parent_id(mut self, parent_id: Uuid) -> Self { - self.report_builder = self.report_builder.with_parent_id(parent_id); - self - } - - pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self { - self.report_builder = self.report_builder.with_metadata(metadata); - self - } -} - -pub struct OldJob { - id: Uuid, - hash: u64, - report: Option, - state: Option>, - next_jobs: VecDeque>, -} - -impl OldJob { - pub fn new(init: SJob) -> Box { - JobBuilder::new(init).build() - } - - pub fn queue_next(mut self: Box, init: NextSJob) -> Box - where - NextSJob: StatefulJob + 'static, - { - let next_job_order = self.next_jobs.len() + 1; - - let mut child_job_builder = JobBuilder::new(init).with_parent_id(self.id); - - if let Some(parent_report) = self.report() { - if let Some(parent_action) = &parent_report.action { - child_job_builder = - child_job_builder.with_action(format!("{parent_action}-{next_job_order}")); - } - } - - self.next_jobs.push_back(child_job_builder.build()); - - self - } - // this function returns an ingestible job instance from a job report pub fn new_from_report( mut report: JobReport, diff --git a/core/src/old_job/report.rs b/core/src/old_job/report.rs index 1e620290f902..00548056be4c 100644 --- a/core/src/old_job/report.rs +++ b/core/src/old_job/report.rs @@ -325,19 +325,4 @@ impl JobReportBuilder { parent_id: None, } } - - pub fn with_action(mut self, action: impl AsRef) -> Self { - self.action = Some(action.as_ref().to_string()); - self - } - - pub fn with_metadata(mut self, metadata: serde_json::Value) -> Self { - self.metadata = Some(metadata); - self - } - - pub fn with_parent_id(mut self, parent_id: Uuid) -> Self { - self.parent_id = Some(parent_id); - self - } } diff --git a/core/src/old_job/worker.rs b/core/src/old_job/worker.rs index 1405d952b8cf..7db01d70f2fd 100644 --- a/core/src/old_job/worker.rs +++ b/core/src/old_job/worker.rs @@ -307,7 +307,9 @@ impl Worker { let task_count = report.task_count as usize; let completed_task_count = report.completed_task_count as usize; let remaining_task_count = task_count.saturating_sub(completed_task_count); - let remaining_time_per_task = elapsed / (completed_task_count + 1) as i32; // Adding 1 to avoid division by zero + + // Adding 1 to avoid division by zero + let remaining_time_per_task = elapsed / (completed_task_count + 1) as i32; let remaining_time = remaining_time_per_task * remaining_task_count as i32; // Update the report with estimated remaining time diff --git a/crates/p2p/crates/block/src/lib.rs b/crates/p2p/crates/block/src/lib.rs index 35a1af8ca542..7d6554dfdfcd 100644 --- a/crates/p2p/crates/block/src/lib.rs +++ b/crates/p2p/crates/block/src/lib.rs @@ -257,7 +257,7 @@ mod tests { tx.send(()).unwrap(); Transfer::new(&req, |_| {}, &Default::default()) .send(&mut client, file) - .await; + .await } }); @@ -266,7 +266,8 @@ mod tests { let mut result = Vec::new(); Transfer::new(&req, |_| {}, &Default::default()) .receive(&mut server, &mut result) - .await; + .await + .unwrap(); assert_eq!(result, data); } @@ -298,7 +299,7 @@ mod tests { tx.send(()).unwrap(); Transfer::new(&req, |_| {}, &Default::default()) .send(&mut client, file) - .await; + .await } }); @@ -307,7 +308,9 @@ mod tests { let mut result = Vec::new(); Transfer::new(&req, |_| {}, &Default::default()) .receive(&mut server, &mut result) - .await; + .await + .unwrap(); + assert_eq!(result, data); } @@ -339,14 +342,14 @@ mod tests { Transfer::new(&req, |_| {}, &Arc::new(AtomicBool::new(true))) .send(&mut client, file) - .await; + .await } }); rx.await.unwrap(); let mut result = Vec::new(); - Transfer::new(&req, |_| {}, &Default::default()) + let _ = Transfer::new(&req, |_| {}, &Default::default()) .receive(&mut server, &mut result) .await; assert_eq!(result, Vec::::new()); // Cancelled by sender so no data @@ -380,14 +383,14 @@ mod tests { Transfer::new(&req, |_| {}, &Default::default()) .send(&mut client, file) - .await; + .await } }); rx.await.unwrap(); let mut result = Vec::new(); - Transfer::new(&req, |_| {}, &Arc::new(AtomicBool::new(true))) + let _ = Transfer::new(&req, |_| {}, &Arc::new(AtomicBool::new(true))) .receive(&mut server, &mut result) .await; assert_eq!(result, Vec::::new()); // Cancelled by sender so no data @@ -422,14 +425,14 @@ mod tests { Transfer::new(&req, |_| {}, &Default::default()) .send(&mut client, file) - .await; + .await } }); rx.await.unwrap(); let mut result = Vec::new(); - Transfer::new(&req, |_| {}, &Default::default()) + let _ = Transfer::new(&req, |_| {}, &Default::default()) .receive(&mut server, &mut result) .await; assert_eq!(result, Vec::::new()); // Cancelled by sender so no data diff --git a/crates/sync/src/compressed.rs b/crates/sync/src/compressed.rs index bdcd523da58c..1056a68ad6f3 100644 --- a/crates/sync/src/compressed.rs +++ b/crates/sync/src/compressed.rs @@ -87,6 +87,7 @@ impl CompressedCRDTOperations { }) } + #[must_use] pub fn len(&self) -> usize { self.0 .iter() @@ -98,6 +99,11 @@ impl CompressedCRDTOperations { .sum::() } + #[must_use] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + pub fn into_ops(self) -> Vec { let mut ops = vec![]; diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index ab15e2f28ed4..8e23809d2777 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -167,6 +167,8 @@ export type CRDTOperationData = { c: { [key in string]: JsonValue } } | { u: { f export type CameraData = { device_make: string | null; device_model: string | null; color_space: string | null; color_profile: ColorProfile | null; focal_length: number | null; shutter_speed: number | null; flash: Flash | null; orientation: Orientation; lens_make: string | null; lens_model: string | null; bit_depth: number | null; zoom: number | null; iso: number | null; software: string | null; serial_number: string | null; lens_serial_number: string | null; contrast: number | null; saturation: number | null; sharpness: number | null; composite: Composite | null } +export type CasId = string + export type ChangeNodeNameArgs = { name: string | null; p2p_port: Port | null; p2p_ipv4_enabled: boolean | null; p2p_ipv6_enabled: boolean | null; p2p_discovery: P2PDiscoveryState | null; p2p_remote_access: boolean | null; image_labeler_version: string | null } export type Chapter = { id: number; start: [number, number]; end: [number, number]; time_base_den: number; time_base_num: number; metadata: Metadata } @@ -642,7 +644,7 @@ export type TextMatch = { contains: string } | { startsWith: string } | { endsWi * This type is used to pass the relevant data to the frontend so it can request the thumbnail. * Tt supports extending the shard hex to support deeper directory structures in the future */ -export type ThumbKey = { shard_hex: string; cas_id: string; base_directory_str: string } +export type ThumbKey = { shard_hex: string; cas_id: CasId; base_directory_str: string } export type UpdateThumbnailerPreferences = { background_processing_percentage: number } From c61fea5ad2ec22fc2ba46bbb4cf89e6e702d5fad Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Thu, 30 May 2024 03:17:51 -0300 Subject: [PATCH 24/33] Bridging old and new job systems --- .../heavy-lifting/src/job_system/job.rs | 9 +- .../heavy-lifting/src/job_system/report.rs | 100 +++++++-- .../heavy-lifting/src/job_system/runner.rs | 10 +- .../heavy-lifting/src/job_system/store.rs | 3 + core/src/api/jobs.rs | 69 ++++-- core/src/old_job/manager.rs | 10 +- core/src/old_job/mod.rs | 12 +- core/src/old_job/report.rs | 196 ++++++++++++++++-- core/src/old_job/worker.rs | 22 +- packages/client/src/core.ts | 4 +- packages/client/src/utils/jobs/useJobInfo.tsx | 65 +++--- 11 files changed, 381 insertions(+), 119 deletions(-) diff --git a/core/crates/heavy-lifting/src/job_system/job.rs b/core/crates/heavy-lifting/src/job_system/job.rs index f4eb39436ae9..65b4935eb1ab 100644 --- a/core/crates/heavy-lifting/src/job_system/job.rs +++ b/core/crates/heavy-lifting/src/job_system/job.rs @@ -52,6 +52,11 @@ pub enum JobName { FileIdentifier, MediaProcessor, // TODO: Add more job names as needed + Copy, + Move, + Delete, + Erase, + FileValidator, } #[derive(Debug)] @@ -261,9 +266,7 @@ impl JobOutput { ); } - report - .metadata - .extend(metadata.into_iter().map(ReportMetadata::Output)); + report.metadata.extend(metadata.into_iter().map(Into::into)); report.completed_at = Some(Utc::now()); diff --git a/core/crates/heavy-lifting/src/job_system/report.rs b/core/crates/heavy-lifting/src/job_system/report.rs index 80faf458bb95..997d965dc53b 100644 --- a/core/crates/heavy-lifting/src/job_system/report.rs +++ b/core/crates/heavy-lifting/src/job_system/report.rs @@ -1,6 +1,6 @@ use crate::NonCriticalError; -use sd_prisma::prisma::{job, location, PrismaClient}; +use sd_prisma::prisma::{file_path, job, location, PrismaClient}; use sd_utils::db::{maybe_missing, MissingFieldError}; use std::{collections::HashMap, fmt, path::PathBuf, str::FromStr}; @@ -86,6 +86,43 @@ pub enum ReportOutputMetadata { thumbnails_generated: (u32, u32), thumbnails_skipped: (u32, u32), }, + Copier { + source_location_id: location::id::Type, + target_location_id: location::id::Type, + sources_file_path_ids: Vec, + target_location_relative_directory_path: PathBuf, + }, + Mover { + source_location_id: location::id::Type, + target_location_id: location::id::Type, + sources_file_path_ids: Vec, + target_location_relative_directory_path: PathBuf, + }, + Deleter { + location_id: location::id::Type, + file_path_ids: Vec, + }, + Eraser { + location_id: location::id::Type, + file_path_ids: Vec, + passes: u32, + }, + FileValidator { + location_id: location::id::Type, + sub_path: Option, + }, +} + +impl From for ReportMetadata { + fn from(value: ReportInputMetadata) -> Self { + Self::Input(value) + } +} + +impl From for ReportMetadata { + fn from(value: ReportOutputMetadata) -> Self { + Self::Output(value) + } } #[derive(Debug, Serialize, Type, Clone)] @@ -127,38 +164,53 @@ impl fmt::Display for Report { impl TryFrom for Report { type Error = ReportError; - fn try_from(data: job::Data) -> Result { + fn try_from( + job::Data { + id, + name, + action, + status, + errors_text: _, // Deprecated + critical_error, + non_critical_errors, + data: _, // Deprecated + metadata, + parent_id, + task_count, + completed_task_count, + date_estimated_completion, + date_created, + date_started, + date_completed, + .. + }: job::Data, + ) -> Result { Ok(Self { - id: JobId::from_slice(&data.id).expect("corrupted database"), - name: JobName::from_str(&maybe_missing(data.name, "job.name")?)?, - action: data.action, - - metadata: if let Some(metadata) = data.metadata { + id: JobId::from_slice(&id).expect("corrupted database"), + name: JobName::from_str(&maybe_missing(name, "job.name")?)?, + action, + metadata: if let Some(metadata) = metadata { serde_json::from_slice(&metadata)? } else { vec![] }, - critical_error: data.critical_error, - non_critical_errors: if let Some(non_critical_errors) = data.non_critical_errors { + critical_error, + non_critical_errors: if let Some(non_critical_errors) = non_critical_errors { serde_json::from_slice(&non_critical_errors)? } else { vec![] }, - created_at: data.date_created.map(DateTime::into), - started_at: data.date_started.map(DateTime::into), - completed_at: data.date_completed.map(DateTime::into), - parent_id: data - .parent_id - .map(|id| JobId::from_slice(&id).expect("corrupted database")), - status: Status::try_from(maybe_missing(data.status, "job.status")?) + created_at: date_created.map(DateTime::into), + started_at: date_started.map(DateTime::into), + completed_at: date_completed.map(DateTime::into), + parent_id: parent_id.map(|id| JobId::from_slice(&id).expect("corrupted database")), + status: Status::try_from(maybe_missing(status, "job.status")?) .expect("corrupted database"), - task_count: data.task_count.unwrap_or(0), - completed_task_count: data.completed_task_count.unwrap_or(0), + task_count: task_count.unwrap_or(0), + completed_task_count: completed_task_count.unwrap_or(0), phase: String::new(), message: String::new(), - estimated_completion: data - .date_estimated_completion - .map_or_else(Utc::now, DateTime::into), + estimated_completion: date_estimated_completion.map_or_else(Utc::now, DateTime::into), }) } } @@ -186,6 +238,10 @@ impl Report { } } + pub fn push_metadata(&mut self, metadata: ReportOutputMetadata) { + self.metadata.push(metadata.into()); + } + #[must_use] pub fn get_action_name_and_group_key(&self) -> (String, Option) { // actions are formatted like "added_location" or "added_location-1" @@ -355,7 +411,7 @@ impl ReportBuilder { #[must_use] pub fn with_metadata(mut self, metadata: ReportInputMetadata) -> Self { - self.metadata.push(ReportMetadata::Input(metadata)); + self.metadata.push(metadata.into()); self } diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index 75b81243aa40..9b92f9097a1f 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -31,7 +31,7 @@ use uuid::Uuid; use super::{ job::{DynJob, JobHandle, JobName, JobOutput, OuterContext, ReturnStatus}, - report::{self, ReportMetadata, ReportOutputMetadata}, + report::{self, ReportOutputMetadata}, store::{StoredJob, StoredJobEntry}, Command, JobId, JobSystemError, SerializedTasks, }; @@ -293,10 +293,10 @@ impl> JobSystemRunner { diff --git a/core/crates/heavy-lifting/src/job_system/store.rs b/core/crates/heavy-lifting/src/job_system/store.rs index bdd442e119c5..97544a9144b8 100644 --- a/core/crates/heavy-lifting/src/job_system/store.rs +++ b/core/crates/heavy-lifting/src/job_system/store.rs @@ -200,6 +200,9 @@ macro_rules! match_deserialize_job { } )) .map_err(Into::into),)+ + + // TODO(fogodev): this is temporary until we can get rid of the old job system + _ => unimplemented!("Job not implemented"), } }}; } diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index 178748de28d0..3300809c69cd 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -3,12 +3,12 @@ use crate::{ invalidate_query, location::{find_location, LocationError}, object::validation::old_validator_job::OldObjectValidatorJobInit, - old_job::{JobStatus, OldJob, OldJobs}, + old_job::{JobStatus, OldJob, OldJobReport}, }; use sd_core_heavy_lifting::{ file_identifier::FileIdentifier, job_system::report, media_processor::job::MediaProcessor, - JobId, Report, + JobId, JobSystemError, Report, }; use sd_prisma::prisma::{job, location, SortOrder}; @@ -101,16 +101,30 @@ pub(crate) fn mount() -> AlphaRouter { .exec() .await? .into_iter() - .flat_map(Report::try_from) + .flat_map(|job| { + if let Ok(report) = Report::try_from(job.clone()) { + Some(report) + } else { + // TODO(fogodev): this is a temporary fix for the old job system + OldJobReport::try_from(job).map(Into::into).ok() + } + }) .collect(); - let active_reports_by_id = node.job_system.get_active_reports().await; + let mut active_reports_by_id = node.job_system.get_active_reports().await; + active_reports_by_id.extend( + node.old_jobs + .get_active_reports_with_id() + .await + .into_iter() + .map(|(id, old_report)| (id, old_report.into())), + ); for job in job_reports { // action name and group key are computed from the job data let (action_name, group_key) = job.get_action_name_and_group_key(); - trace!("job {job:#?}, action_name {action_name}, group_key {group_key:?}",); + trace!(?job, %action_name, ?group_key); // if the job is running, use the in-memory report let report = active_reports_by_id.get(&job.id).unwrap_or(&job); @@ -211,30 +225,53 @@ pub(crate) fn mount() -> AlphaRouter { // pause job .procedure("pause", { R.with2(library()) - .mutation(|(node, library), id: Uuid| async move { - let ret = OldJobs::pause(&node.old_jobs, id).await.map_err(Into::into); + .mutation(|(node, library), id: JobId| async move { + if let Err(e) = node.job_system.pause(id).await { + if matches!(e, JobSystemError::NotFound(_)) { + // If the job is not found, it can be a job from the old job system + node.old_jobs.pause(id).await?; + } else { + return Err(e.into()); + } + } + invalidate_query!(library, "jobs.reports"); - ret + + Ok(()) }) }) .procedure("resume", { R.with2(library()) .mutation(|(node, library), id: Uuid| async move { - let ret = OldJobs::resume(&node.old_jobs, id) - .await - .map_err(Into::into); + if let Err(e) = node.job_system.resume(id).await { + if matches!(e, JobSystemError::NotFound(_)) { + // If the job is not found, it can be a job from the old job system + node.old_jobs.resume(id).await?; + } else { + return Err(e.into()); + } + } + invalidate_query!(library, "jobs.reports"); - ret + + Ok(()) }) }) .procedure("cancel", { R.with2(library()) .mutation(|(node, library), id: Uuid| async move { - let ret = OldJobs::cancel(&node.old_jobs, id) - .await - .map_err(Into::into); + if let Err(e) = node.job_system.cancel(id).await { + if matches!(e, JobSystemError::NotFound(_)) { + // If the job is not found, it can be a job from the old job system + node.old_jobs.cancel(id).await?; + } else { + return Err(e.into()); + } + } + invalidate_query!(library, "jobs.reports"); - ret + + Ok(()) }) }) .procedure("generateThumbsForLocation", { diff --git a/core/src/old_job/manager.rs b/core/src/old_job/manager.rs index 586811aee6d3..87fdd01adaf9 100644 --- a/core/src/old_job/manager.rs +++ b/core/src/old_job/manager.rs @@ -24,7 +24,7 @@ use tokio::sync::{mpsc, oneshot, RwLock}; use tracing::{debug, error, info, warn}; use uuid::Uuid; -use super::{JobIdentity, JobManagerError, JobReport, JobStatus, StatefulJob}; +use super::{JobIdentity, JobManagerError, JobStatus, OldJobReport, StatefulJob}; const MAX_WORKERS: usize = 5; @@ -281,7 +281,7 @@ impl OldJobs { .exec() .await? .into_iter() - .map(JobReport::try_from); + .map(OldJobReport::try_from); for job in all_jobs { let job = job?; @@ -315,7 +315,7 @@ impl OldJobs { } // get all active jobs, including paused jobs organized by job id - pub async fn get_active_reports_with_id(&self) -> HashMap { + pub async fn get_active_reports_with_id(&self) -> HashMap { self.running_workers .read() .await @@ -328,7 +328,7 @@ impl OldJobs { } // get all running jobs, excluding paused jobs organized by action - pub async fn get_running_reports(&self) -> HashMap { + pub async fn get_running_reports(&self) -> HashMap { self.running_workers .read() .await @@ -378,7 +378,7 @@ mod macros { } /// This function is used to initialize a DynJob from a job report. fn initialize_resumable_job( - job_report: JobReport, + job_report: OldJobReport, next_jobs: Option>>, ) -> Result, JobError> { dispatch_call_to_job_by_name!( diff --git a/core/src/old_job/mod.rs b/core/src/old_job/mod.rs index d9d8a525712f..c27e3fabd7f5 100644 --- a/core/src/old_job/mod.rs +++ b/core/src/old_job/mod.rs @@ -133,8 +133,8 @@ pub trait StatefulJob: pub trait DynJob: Send + Sync { fn id(&self) -> Uuid; fn parent_id(&self) -> Option; - fn report(&self) -> &Option; - fn report_mut(&mut self) -> &mut Option; + fn report(&self) -> &Option; + fn report_mut(&mut self) -> &mut Option; fn name(&self) -> &'static str; async fn run( &mut self, @@ -152,7 +152,7 @@ pub trait DynJob: Send + Sync { pub struct OldJob { id: Uuid, hash: u64, - report: Option, + report: Option, state: Option>, next_jobs: VecDeque>, } @@ -177,7 +177,7 @@ impl OldJob { // this function returns an ingestible job instance from a job report pub fn new_from_report( - mut report: JobReport, + mut report: OldJobReport, next_jobs: Option>>, ) -> Result, JobError> { let state = rmp_serde::from_slice::>( @@ -393,11 +393,11 @@ impl DynJob for OldJob { self.report.as_ref().and_then(|r| r.parent_id) } - fn report(&self) -> &Option { + fn report(&self) -> &Option { &self.report } - fn report_mut(&mut self) -> &mut Option { + fn report_mut(&mut self) -> &mut Option { &mut self.report } diff --git a/core/src/old_job/report.rs b/core/src/old_job/report.rs index 00548056be4c..ee95035105f4 100644 --- a/core/src/old_job/report.rs +++ b/core/src/old_job/report.rs @@ -1,14 +1,20 @@ -use crate::library::Library; +use crate::{ + library::Library, + object::{ + fs::{ + old_copy::OldFileCopierJobInit, old_cut::OldFileCutterJobInit, + old_delete::OldFileDeleterJobInit, old_erase::OldFileEraserJobInit, + }, + validation::old_validator_job::OldObjectValidatorJobInit, + }, +}; use sd_core_prisma_helpers::job_without_data; use sd_prisma::prisma::job; use sd_utils::db::{maybe_missing, MissingFieldError}; -use std::{ - collections::HashMap, - fmt::{Display, Formatter}, -}; +use std::fmt::{Display, Formatter}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; @@ -26,15 +32,15 @@ pub enum JobReportUpdate { Phase(String), } -#[derive(Debug, Serialize, Deserialize, Type, Clone)] -pub struct JobReport { +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct OldJobReport { pub id: Uuid, pub name: String, pub action: Option, pub data: Option>, - // In Typescript `any | null` is just `any` so we don't get prompted for null checks - // TODO(@Oscar): This will be fixed - #[specta(type = Option>)] + // // In Typescript `any | null` is just `any` so we don't get prompted for null checks + // // TODO(@Oscar): This will be fixed + // #[specta(type = Option>)] pub metadata: Option, pub errors_text: Vec, @@ -53,7 +59,150 @@ pub struct JobReport { pub estimated_completion: DateTime, } -impl Display for JobReport { +impl From for sd_core_heavy_lifting::job_system::report::Report { + fn from( + OldJobReport { + id, + name, + action, + data: _, // Not used in the new job system + metadata, + errors_text: _, // New job system uses type-safe errors + created_at, + started_at, + completed_at, + parent_id, + status, + task_count, + completed_task_count, + phase, + message, + estimated_completion, + }: OldJobReport, + ) -> Self { + use sd_core_heavy_lifting::{job_system::report::ReportOutputMetadata, JobName}; + + let mut new_metadata = Vec::new(); + + if let Some(metadata) = metadata { + if let Some(metadata) = metadata.as_object() { + if let Some(metadata) = metadata.get("output") { + if let Some(metadata) = metadata.as_object() { + if let Some(metadata) = metadata.get("init") { + if let Ok(OldFileCopierJobInit { + source_location_id, + target_location_id, + sources_file_path_ids, + target_location_relative_directory_path, + }) = serde_json::from_value::(metadata.clone()) + { + new_metadata.push( + ReportOutputMetadata::Copier { + source_location_id, + target_location_id, + sources_file_path_ids, + target_location_relative_directory_path, + } + .into(), + ); + } else if let Ok(OldFileCutterJobInit { + source_location_id, + target_location_id, + sources_file_path_ids, + target_location_relative_directory_path, + }) = + serde_json::from_value::(metadata.clone()) + { + new_metadata.push( + ReportOutputMetadata::Mover { + source_location_id, + target_location_id, + sources_file_path_ids, + target_location_relative_directory_path, + } + .into(), + ); + } else if let Ok(OldFileDeleterJobInit { + location_id, + file_path_ids, + }) = + serde_json::from_value::(metadata.clone()) + { + new_metadata.push( + ReportOutputMetadata::Deleter { + location_id, + file_path_ids, + } + .into(), + ); + } else if let Ok(OldFileEraserJobInit { + location_id, + file_path_ids, + passes, + }) = + serde_json::from_value::(metadata.clone()) + { + new_metadata.push( + ReportOutputMetadata::Eraser { + location_id, + file_path_ids, + passes: passes as u32, + } + .into(), + ); + } else if let Ok(OldObjectValidatorJobInit { location, sub_path }) = + serde_json::from_value::( + metadata.clone(), + ) { + new_metadata.push( + ReportOutputMetadata::FileValidator { + location_id: location.id, + sub_path, + } + .into(), + ); + } + } + } + } + } + } + + Self { + id, + name: match name.as_str() { + "file_copier" => JobName::Copy, + "file_cutter" => JobName::Move, + "file_deleter" => JobName::Delete, + "file_eraser" => JobName::Erase, + "object_validator" => JobName::FileValidator, + + // Already implemented in the new job system + "indexer" => JobName::Indexer, + "file_identifier" => JobName::FileIdentifier, + "media_processor" => JobName::MediaProcessor, + + unexpected_job => unimplemented!("Job {unexpected_job} not implemented"), + }, + action, + metadata: new_metadata, + critical_error: None, + non_critical_errors: Vec::new(), + created_at, + started_at, + completed_at, + parent_id, + status: status.into(), + task_count, + completed_task_count, + phase, + message, + estimated_completion, + } + } +} + +impl Display for OldJobReport { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, @@ -64,7 +213,7 @@ impl Display for JobReport { } // convert database struct into a resource struct -impl TryFrom for JobReport { +impl TryFrom for OldJobReport { type Error = MissingFieldError; fn try_from(data: job::Data) -> Result { @@ -105,7 +254,7 @@ impl TryFrom for JobReport { // I despise having to write this twice, but it seems to be the only way to // remove the data field from the struct // would love to get this DRY'd up -impl TryFrom for JobReport { +impl TryFrom for OldJobReport { type Error = MissingFieldError; fn try_from(data: job_without_data::Data) -> Result { @@ -144,7 +293,7 @@ impl TryFrom for JobReport { } } -impl JobReport { +impl OldJobReport { pub fn new(uuid: Uuid, name: String) -> Self { Self { id: uuid, @@ -286,6 +435,21 @@ impl TryFrom for JobStatus { } } +// TODO(fogodev): this is temporary until we can get rid of the old job system +impl From for sd_core_heavy_lifting::job_system::report::Status { + fn from(value: JobStatus) -> Self { + match value { + JobStatus::Queued => Self::Queued, + JobStatus::Running => Self::Running, + JobStatus::Completed => Self::Completed, + JobStatus::Canceled => Self::Canceled, + JobStatus::Failed => Self::Failed, + JobStatus::Paused => Self::Paused, + JobStatus::CompletedWithErrors => Self::CompletedWithErrors, + } + } +} + pub struct JobReportBuilder { pub id: Uuid, pub name: String, @@ -295,8 +459,8 @@ pub struct JobReportBuilder { } impl JobReportBuilder { - pub fn build(self) -> JobReport { - JobReport { + pub fn build(self) -> OldJobReport { + OldJobReport { id: self.id, name: self.name, action: self.action, diff --git a/core/src/old_job/worker.rs b/core/src/old_job/worker.rs index 7db01d70f2fd..28ecaf173adf 100644 --- a/core/src/old_job/worker.rs +++ b/core/src/old_job/worker.rs @@ -28,8 +28,8 @@ use tracing::{debug, error, info, trace, warn}; use uuid::Uuid; use super::{ - DynJob, JobError, JobIdentity, JobReport, JobReportUpdate, JobRunErrors, JobRunOutput, - JobStatus, OldJobs, + DynJob, JobError, JobIdentity, JobReportUpdate, JobRunErrors, JobRunOutput, JobStatus, + OldJobReport, OldJobs, }; const FIVE_SECS: Duration = Duration::from_secs(5); @@ -114,8 +114,8 @@ impl WorkerContext { pub struct Worker { pub(super) library_id: Uuid, commands_tx: chan::Sender, - report_watch_tx: Arc>, - report_watch_rx: watch::Receiver, + report_watch_tx: Arc>, + report_watch_rx: watch::Receiver, paused: AtomicBool, } @@ -123,7 +123,7 @@ impl Worker { pub async fn new( id: Uuid, mut job: Box, - mut report: JobReport, + mut report: OldJobReport, library: Arc, node: Arc, job_manager: Arc, @@ -255,7 +255,7 @@ impl Worker { } } - pub fn report(&self) -> JobReport { + pub fn report(&self) -> OldJobReport { self.report_watch_rx.borrow().clone() } @@ -264,9 +264,9 @@ impl Worker { } fn track_progress( - report: &mut JobReport, + report: &mut OldJobReport, last_report_watch_update: &mut Instant, - report_watch_tx: &watch::Sender, + report_watch_tx: &watch::Sender, start_time: DateTime, updates: Vec, library: &Library, @@ -348,7 +348,7 @@ impl Worker { hash, mut report, }: JobWorkTable, - report_watch_tx: Arc>, + report_watch_tx: Arc>, start_time: DateTime, (commands_tx, commands_rx): (chan::Sender, chan::Receiver), library: Arc, @@ -505,7 +505,7 @@ impl Worker { async fn process_job_output( mut job: Box, job_result: Result, - report: &mut JobReport, + report: &mut OldJobReport, library: &Library, ) -> Option> { // Run the job and handle the result @@ -651,7 +651,7 @@ struct JobWorkTable { job: Box, manager: Arc, hash: u64, - report: JobReport, + report: OldJobReport, } fn invalidate_queries(library: &Library) { diff --git a/packages/client/src/core.ts b/packages/client/src/core.ts index 8e23809d2777..6e466adc793f 100644 --- a/packages/client/src/core.ts +++ b/packages/client/src/core.ts @@ -377,7 +377,7 @@ export type InvalidateOperationEvent = { type: "single"; data: SingleInvalidateO export type JobGroup = { id: string; action: string | null; status: Status; created_at: string; jobs: Report[] } -export type JobName = "Indexer" | "FileIdentifier" | "MediaProcessor" +export type JobName = "Indexer" | "FileIdentifier" | "MediaProcessor" | "Copy" | "Move" | "Delete" | "Erase" | "FileValidator" export type JobProgressEvent = { id: string; library_id: string; task_count: number; completed_task_count: number; phase: string; message: string; estimated_completion: string } @@ -580,7 +580,7 @@ export type ReportInputMetadata = { type: "location"; data: Location } | { type: export type ReportMetadata = { type: "input"; metadata: ReportInputMetadata } | { type: "output"; metadata: ReportOutputMetadata } -export type ReportOutputMetadata = { type: "metrics"; data: { [key in string]: JsonValue } } | { type: "indexer"; data: { total_paths: [number, number] } } | { type: "file_identifier"; data: { total_orphan_paths: [number, number]; total_objects_created: [number, number]; total_objects_linked: [number, number] } } | { type: "media_processor"; data: { media_data_extracted: [number, number]; media_data_skipped: [number, number]; thumbnails_generated: [number, number]; thumbnails_skipped: [number, number] } } +export type ReportOutputMetadata = { type: "metrics"; data: { [key in string]: JsonValue } } | { type: "indexer"; data: { total_paths: [number, number] } } | { type: "file_identifier"; data: { total_orphan_paths: [number, number]; total_objects_created: [number, number]; total_objects_linked: [number, number] } } | { type: "media_processor"; data: { media_data_extracted: [number, number]; media_data_skipped: [number, number]; thumbnails_generated: [number, number]; thumbnails_skipped: [number, number] } } | { type: "copier"; data: { source_location_id: number; target_location_id: number; sources_file_path_ids: number[]; target_location_relative_directory_path: string } } | { type: "mover"; data: { source_location_id: number; target_location_id: number; sources_file_path_ids: number[]; target_location_relative_directory_path: string } } | { type: "deleter"; data: { location_id: number; file_path_ids: number[] } } | { type: "eraser"; data: { location_id: number; file_path_ids: number[]; passes: number } } | { type: "file_validator"; data: { location_id: number; sub_path: string | null } } export type RescanArgs = { location_id: number; sub_path: string } diff --git a/packages/client/src/utils/jobs/useJobInfo.tsx b/packages/client/src/utils/jobs/useJobInfo.tsx index 91544350f590..7dce8598fb7f 100644 --- a/packages/client/src/utils/jobs/useJobInfo.tsx +++ b/packages/client/src/utils/jobs/useJobInfo.tsx @@ -232,39 +232,38 @@ export function useJobInfo(job: Report, realtimeUpdate: JobProgressEvent | null) }; } - // TODO(fogodev): put these back in when they're implemented - // case 'file_copier': - // return { - // ...data, - // name: `${isQueued ? 'Copy' : isRunning ? 'Copying' : 'Copied'} ${ - // isRunning ? completedTaskCount + 1 : completedTaskCount - // } ${isRunning ? `of ${job.task_count}` : ``} ${plural(job.task_count, 'file')}`, - // textItems: [[{ text: job.status }]] - // }; - // case 'file_deleter': - // return { - // ...data, - // name: `${ - // isQueued ? 'Delete' : isRunning ? 'Deleting' : 'Deleted' - // } ${completedTaskCount} ${plural(completedTaskCount, 'file')}`, - // textItems: [[{ text: job.status }]] - // }; - // case 'file_cutter': - // return { - // ...data, - // name: `${ - // isQueued ? 'Cut' : isRunning ? 'Cutting' : 'Cut' - // } ${completedTaskCount} ${plural(completedTaskCount, 'file')}`, - // textItems: [[{ text: job.status }]] - // }; - // case 'object_validator': - // return { - // ...data, - // name: `${isQueued ? 'Validate' : isRunning ? 'Validating' : 'Validated'} ${ - // !isQueued ? completedTaskCount : '' - // } ${plural(completedTaskCount, 'object')}`, - // textItems: [[{ text: job.status }]] - // }; + case 'Copy': + return { + ...data, + name: `${isQueued ? 'Copy' : isRunning ? 'Copying' : 'Copied'} ${ + isRunning ? completedTaskCount + 1 : completedTaskCount + } ${isRunning ? `of ${job.task_count}` : ``} ${plural(job.task_count, 'file')}`, + textItems: [[{ text: job.status }]] + }; + case 'Delete': + return { + ...data, + name: `${ + isQueued ? 'Delete' : isRunning ? 'Deleting' : 'Deleted' + } ${completedTaskCount} ${plural(completedTaskCount, 'file')}`, + textItems: [[{ text: job.status }]] + }; + case 'Move': + return { + ...data, + name: `${ + isQueued ? 'Cut' : isRunning ? 'Cutting' : 'Cut' + } ${completedTaskCount} ${plural(completedTaskCount, 'file')}`, + textItems: [[{ text: job.status }]] + }; + case 'FileValidator': + return { + ...data, + name: `${isQueued ? 'Validate' : isRunning ? 'Validating' : 'Validated'} ${ + !isQueued ? completedTaskCount : '' + } ${plural(completedTaskCount, 'file')}`, + textItems: [[{ text: job.status }]] + }; default: return { ...data, From 852bb10a62db8f1213d71987c40d9157540d8605 Mon Sep 17 00:00:00 2001 From: Ericson Soares Date: Tue, 4 Jun 2024 03:07:36 -0300 Subject: [PATCH 25/33] A ton of fixes --- Cargo.lock | 22 +- Cargo.toml | 4 +- apps/desktop/src-tauri/src/main.rs | 2 +- apps/mobile/src/components/job/JobGroup.tsx | 105 +++-- .../heavy-lifting/src/file_identifier/job.rs | 130 ++++-- .../heavy-lifting/src/file_identifier/mod.rs | 15 +- .../src/file_identifier/shallow.rs | 30 +- core/crates/heavy-lifting/src/indexer/job.rs | 164 +++++--- core/crates/heavy-lifting/src/indexer/mod.rs | 2 +- .../heavy-lifting/src/indexer/shallow.rs | 17 +- .../src/indexer/tasks/walker/mod.rs | 112 +++-- .../src/indexer/tasks/walker/save_state.rs | 11 +- .../heavy-lifting/src/job_system/error.rs | 25 +- .../heavy-lifting/src/job_system/job.rs | 369 +++++++++++++---- .../heavy-lifting/src/job_system/mod.rs | 53 ++- .../heavy-lifting/src/job_system/report.rs | 12 +- .../heavy-lifting/src/job_system/runner.rs | 166 ++++---- .../heavy-lifting/src/job_system/utils.rs | 27 +- core/crates/heavy-lifting/src/lib.rs | 4 + .../heavy-lifting/src/media_processor/job.rs | 75 ++-- .../src/media_processor/shallow.rs | 21 +- .../heavy-lifting/src/utils/sub_path.rs | 105 +++-- core/src/api/jobs.rs | 42 +- core/src/context.rs | 7 +- core/src/lib.rs | 6 +- core/src/location/manager/mod.rs | 6 +- core/src/location/manager/watcher/utils.rs | 3 +- core/src/location/mod.rs | 83 ++-- core/src/util/debug_initializer.rs | 3 +- crates/task-system/Cargo.toml | 1 + crates/task-system/src/message.rs | 17 +- crates/task-system/src/system.rs | 382 ++++++++++++------ crates/task-system/src/task.rs | 375 ++++++++++------- crates/task-system/src/worker/mod.rs | 30 +- crates/task-system/src/worker/run.rs | 19 +- crates/task-system/src/worker/runner.rs | 180 +++++---- crates/task-system/tests/common/actors.rs | 8 +- crates/task-system/tests/common/jobs.rs | 4 +- crates/task-system/tests/common/tasks.rs | 67 ++- crates/task-system/tests/integration_test.rs | 86 +++- .../Layout/Sidebar/JobManager/JobGroup.tsx | 16 +- packages/client/src/core.ts | 2 +- packages/client/src/utils/jobs/useJobInfo.tsx | 29 +- 43 files changed, 1856 insertions(+), 981 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b042dea7fee5..7f9cd489344d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4756,6 +4756,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "0.4.8" @@ -9106,7 +9115,7 @@ dependencies = [ "icrate", "image", "int-enum", - "itertools 0.12.1", + "itertools 0.13.0", "libc", "mini-moka", "normpath", @@ -9200,7 +9209,7 @@ dependencies = [ "futures-concurrency", "globset", "image", - "itertools 0.12.1", + "itertools 0.13.0", "lending-stream", "once_cell", "prisma-client-rust", @@ -9654,6 +9663,7 @@ dependencies = [ "tokio", "tokio-stream", "tracing", + "tracing-subscriber", "tracing-test", "uuid", ] @@ -11229,9 +11239,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.37.0" +version = "1.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" +checksum = "ba4f4a02a7a80d6f274636f0aa95c7e383b912d41fe721a31f29e29698585a4a" dependencies = [ "backtrace", "bytes", @@ -11249,9 +11259,9 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 78bebc482156..0df556d3ac42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,7 +57,7 @@ globset = "0.4.14" hex = "0.4.3" http = "0.2.9" image = "0.25.1" -itertools = "0.12.1" +itertools = "0.13.0" lending-stream = "1.0.0" libc = "0.2.154" normpath = "1.2.0" @@ -76,7 +76,7 @@ strum = "0.26.2" strum_macros = "0.26.2" tempfile = "3.10.1" thiserror = "1.0.60" -tokio = "1.37.0" +tokio = "1.38.0" tokio-stream = "0.1.15" tokio-util = "0.7.11" tracing = "0.1.40" diff --git a/apps/desktop/src-tauri/src/main.rs b/apps/desktop/src-tauri/src/main.rs index 0491784c28a4..b676fffb690d 100644 --- a/apps/desktop/src-tauri/src/main.rs +++ b/apps/desktop/src-tauri/src/main.rs @@ -32,7 +32,7 @@ async fn app_ready(app_handle: AppHandle) { #[tauri::command(async)] #[specta::specta] -// If this erorrs, we don't have FDA and we need to re-prompt for it +// If this errors, we don't have FDA and we need to re-prompt for it async fn request_fda_macos() { DiskAccess::request_fda().expect("Unable to request full disk access"); } diff --git a/apps/mobile/src/components/job/JobGroup.tsx b/apps/mobile/src/components/job/JobGroup.tsx index 0a9c68a128d4..84fea849ccfa 100644 --- a/apps/mobile/src/components/job/JobGroup.tsx +++ b/apps/mobile/src/components/job/JobGroup.tsx @@ -1,4 +1,9 @@ import { Folder } from '@sd/assets/icons'; +import dayjs from 'dayjs'; +import { DotsThreeVertical, Eye, Pause, Play, Stop, Trash } from 'phosphor-react-native'; +import { SetStateAction, useMemo, useState } from 'react'; +import { Animated, Pressable, View } from 'react-native'; +import { Swipeable } from 'react-native-gesture-handler'; import { getJobNiceActionName, getTotalTasks, @@ -9,11 +14,6 @@ import { useRspcLibraryContext, useTotalElapsedTimeText } from '@sd/client'; -import dayjs from 'dayjs'; -import { DotsThreeVertical, Eye, Pause, Play, Stop, Trash } from 'phosphor-react-native'; -import { SetStateAction, useMemo, useState } from 'react'; -import { Animated, Pressable, View } from 'react-native'; -import { Swipeable } from 'react-native-gesture-handler'; import { tw, twStyle } from '~/lib/tailwind'; import { AnimatedHeight } from '../animation/layout'; @@ -64,7 +64,12 @@ export default function ({ group, progress }: JobGroupProps) { { transform: [{ translateX: translate }] } ]} > - + ); }; @@ -164,19 +169,17 @@ interface OptionsProps { activeJob?: Report; group: JobGroup; showChildJobs: boolean; - setShowChildJobs: React.Dispatch> + setShowChildJobs: React.Dispatch>; } function Options({ activeJob, group, setShowChildJobs, showChildJobs }: OptionsProps) { - const rspc = useRspcLibraryContext(); - const clearJob = useLibraryMutation( - ['jobs.clear'], { - onSuccess: () => { - rspc.queryClient.invalidateQueries(['jobs.reports']); - } - }) + const clearJob = useLibraryMutation(['jobs.clear'], { + onSuccess: () => { + rspc.queryClient.invalidateQueries(['jobs.reports']); + } + }); const resumeJob = useLibraryMutation( ['jobs.resume'], @@ -200,8 +203,7 @@ function Options({ activeJob, group, setShowChildJobs, showChildJobs }: OptionsP group.jobs.forEach((job) => { clearJob.mutate(job.id); //only one toast for all jobs - if (job.id === group.id) - toast.success('Job has been removed'); + if (job.id === group.id) toast.success('Job has been removed'); }); }; @@ -209,35 +211,68 @@ function Options({ activeJob, group, setShowChildJobs, showChildJobs }: OptionsP <> {/* Resume */} {(group.status === 'Queued' || group.status === 'Paused' || isJobPaused) && ( - )} {/* TODO: This should remove the job from panel */} - {!activeJob !== undefined ? ( - - - - } - > - setShowChildJobs(!showChildJobs)} - text="Expand" icon={Eye}/> - - - ) : ( + {activeJob !== undefined ? ( - - + ) : ( + + + + } + > + setShowChildJobs(!showChildJobs)} + text="Expand" + icon={Eye} + /> + + )} ); diff --git a/core/crates/heavy-lifting/src/file_identifier/job.rs b/core/crates/heavy-lifting/src/file_identifier/job.rs index beef1650122c..4217471d1461 100644 --- a/core/crates/heavy-lifting/src/file_identifier/job.rs +++ b/core/crates/heavy-lifting/src/file_identifier/job.rs @@ -4,7 +4,7 @@ use crate::{ job::{Job, JobReturn, JobTaskDispatcher, ReturnStatus}, report::ReportOutputMetadata, utils::cancel_pending_tasks, - SerializableJob, SerializedTasks, + JobCanceledError, JobErrorOrJobCanceledError, SerializableJob, SerializedTasks, }, utils::sub_path::maybe_get_iso_file_path_from_sub_path, Error, JobContext, JobName, LocationScanState, NonCriticalError, OuterContext, ProgressUpdate, @@ -47,19 +47,21 @@ use super::{ #[derive(Debug, Serialize, Deserialize, Clone, Copy)] enum Phase { + SearchingOrphans, IdentifyingFiles, ProcessingObjects, } impl Default for Phase { fn default() -> Self { - Self::IdentifyingFiles + Self::SearchingOrphans } } impl fmt::Display for Phase { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { + Self::SearchingOrphans => write!(f, "searching_orphans"), Self::IdentifyingFiles => write!(f, "identifying_files"), Self::ProcessingObjects => write!(f, "processing_objects"), } @@ -113,7 +115,7 @@ impl Job for FileIdentifier { ctx: &impl JobContext, SerializedTasks(serialized_tasks): SerializedTasks, ) -> Result<(), Error> { - self.pending_tasks_on_resume = dispatcher + if let Ok(tasks) = dispatcher .dispatch_many_boxed( rmp_serde::from_slice::)>>(&serialized_tasks) .map_err(file_identifier::Error::from)? @@ -140,7 +142,12 @@ impl Job for FileIdentifier { .await .map_err(file_identifier::Error::from)?, ) - .await; + .await + { + self.pending_tasks_on_resume = tasks; + } else { + warn!("Failed to dispatch tasks to resume as job was already canceled"); + } Ok(()) } @@ -162,16 +169,30 @@ impl Job for FileIdentifier { ) -> Result { let mut pending_running_tasks = FuturesUnordered::new(); - self.init_or_resume(&mut pending_running_tasks, &ctx, &dispatcher) - .await?; + match self + .init_or_resume(&mut pending_running_tasks, &ctx, &dispatcher) + .await + { + Ok(()) => { /* Everything is awesome! */ } + Err(JobErrorOrJobCanceledError::JobError(e)) => { + return Err(e.into()); + } + Err(JobErrorOrJobCanceledError::JobCanceled(_)) => { + return Ok(self.cancel_job(&mut pending_running_tasks).await); + } + } while let Some(task) = pending_running_tasks.next().await { match task { Ok(TaskStatus::Done((task_id, TaskOutput::Out(out)))) => { - pending_running_tasks.extend( - self.process_task_output(task_id, out, &ctx, &dispatcher) - .await, - ); + let Ok(tasks) = self + .process_task_output(task_id, out, &ctx, &dispatcher) + .await + else { + return Ok(self.cancel_job(&mut pending_running_tasks).await); + }; + + pending_running_tasks.extend(tasks); } Ok(TaskStatus::Done((task_id, TaskOutput::Empty))) => { @@ -183,19 +204,17 @@ impl Job for FileIdentifier { } Ok(TaskStatus::Error(e)) => { - cancel_pending_tasks(&pending_running_tasks).await; + cancel_pending_tasks(&mut pending_running_tasks).await; return Err(e); } Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion) => { - cancel_pending_tasks(&pending_running_tasks).await; - - return Ok(ReturnStatus::Canceled); + return Ok(self.cancel_job(&mut pending_running_tasks).await); } Err(e) => { - cancel_pending_tasks(&pending_running_tasks).await; + cancel_pending_tasks(&mut pending_running_tasks).await; return Err(e.into()); } @@ -263,17 +282,18 @@ impl FileIdentifier { pending_running_tasks: &mut FuturesUnordered>, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, - ) -> Result<(), file_identifier::Error> { + ) -> Result<(), JobErrorOrJobCanceledError> { // if we don't have any pending task, then this is a fresh job if self.pending_tasks_on_resume.is_empty() { let db = ctx.db(); - let maybe_sub_iso_file_path = maybe_get_iso_file_path_from_sub_path( - self.location.id, - &self.sub_path, - &*self.location_path, - db, - ) - .await?; + let maybe_sub_iso_file_path = + maybe_get_iso_file_path_from_sub_path::( + self.location.id, + self.sub_path.as_ref(), + &*self.location_path, + db, + ) + .await?; let mut last_orphan_file_path_id = None; @@ -287,6 +307,8 @@ impl FileIdentifier { ) .map_err(file_identifier::Error::from)?; + ctx.progress([ProgressUpdate::phase(self.phase)]).await; + // First we dispatch some shallow priority tasks to quickly identify orphans in the location // root directory or in the desired sub-path self.dispatch_priority_identifier_tasks( @@ -309,8 +331,11 @@ impl FileIdentifier { ) .await?; + self.phase = Phase::IdentifyingFiles; + ctx.progress(vec![ ProgressUpdate::TaskCount(u64::from(self.metadata.total_identifier_tasks)), + ProgressUpdate::phase(self.phase), ProgressUpdate::Message(format!( "{} files to be identified", self.metadata.total_found_orphans @@ -353,7 +378,7 @@ impl FileIdentifier { any_task_output: Box, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, - ) -> Vec> { + ) -> Result>, JobCanceledError> { if any_task_output.is::() { return self .process_identifier_output( @@ -378,7 +403,7 @@ impl FileIdentifier { unreachable!("Unexpected task output type: "); } - vec![] + Ok(vec![]) } #[instrument( @@ -406,7 +431,7 @@ impl FileIdentifier { }: identifier::Output, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, - ) -> Vec> { + ) -> Result>, JobCanceledError> { self.metadata.mean_extract_metadata_time += extract_metadata_time; self.metadata.mean_save_db_time_on_identifier_tasks += save_db_time; self.metadata.total_identified_files += total_identified_files; @@ -448,13 +473,14 @@ impl FileIdentifier { // If we completed all identifier tasks, then we dispatch the object processor tasks if self.metadata.completed_identifier_tasks == self.metadata.total_identifier_tasks { + self.phase = Phase::ProcessingObjects; let tasks = dispatch_object_processor_tasks( self.file_paths_accumulator.drain(), ctx, dispatcher, false, ) - .await; + .await?; #[allow(clippy::cast_possible_truncation)] { @@ -469,9 +495,9 @@ impl FileIdentifier { ]) .await; - tasks + Ok(tasks) } else { - vec![] + Ok(vec![]) } } @@ -535,10 +561,12 @@ impl FileIdentifier { ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, pending_running_tasks: &FuturesUnordered>, - ) -> Result<(), file_identifier::Error> { + ) -> Result<(), JobErrorOrJobCanceledError> { let db = ctx.db(); loop { + let start = Instant::now(); + #[allow(clippy::cast_possible_wrap)] // SAFETY: we know that CHUNK_SIZE is a valid i64 let orphan_paths = db @@ -552,7 +580,8 @@ impl FileIdentifier { .take(CHUNK_SIZE as i64) .select(file_path_for_file_identifier::select()) .exec() - .await?; + .await + .map_err(file_identifier::Error::from)?; trace!(orphans_count = orphan_paths.len(), "Found orphan paths"); @@ -591,7 +620,14 @@ impl FileIdentifier { Arc::clone(ctx.db()), Arc::clone(ctx.sync()), )) - .await, + .await?, + ); + + debug!( + "Dispatched ({}/{}) identifier tasks, took: {:?}", + self.metadata.completed_identifier_tasks, + self.metadata.total_identifier_tasks, + start.elapsed(), ); } @@ -605,10 +641,12 @@ impl FileIdentifier { ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, pending_running_tasks: &FuturesUnordered>, - ) -> Result<(), file_identifier::Error> { + ) -> Result<(), JobErrorOrJobCanceledError> { let db = ctx.db(); loop { + let start = Instant::now(); + #[allow(clippy::cast_possible_wrap)] // SAFETY: we know that CHUNK_SIZE is a valid i64 let mut orphan_paths = db @@ -622,7 +660,8 @@ impl FileIdentifier { .take(CHUNK_SIZE as i64) .select(file_path_for_file_identifier::select()) .exec() - .await?; + .await + .map_err(file_identifier::Error::from)?; // No other orphans to identify, we can break the loop if orphan_paths.is_empty() { @@ -665,12 +704,33 @@ impl FileIdentifier { Arc::clone(ctx.db()), Arc::clone(ctx.sync()), )) - .await, + .await?, + ); + + debug!( + "Dispatched ({}/{}) identifier tasks, took: {:?}", + self.metadata.completed_identifier_tasks, + self.metadata.total_identifier_tasks, + start.elapsed(), ); } Ok(()) } + + async fn cancel_job( + &mut self, + pending_running_tasks: &mut FuturesUnordered>, + ) -> ReturnStatus { + cancel_pending_tasks(pending_running_tasks).await; + + ReturnStatus::Canceled( + JobReturn::builder() + .with_metadata(mem::take(&mut self.metadata)) + .with_non_critical_errors(mem::take(&mut self.errors)) + .build(), + ) + } } #[derive(Debug, Clone, Copy, Serialize, Deserialize)] diff --git a/core/crates/heavy-lifting/src/file_identifier/mod.rs b/core/crates/heavy-lifting/src/file_identifier/mod.rs index 3ff3d33831a7..c634e87e598a 100644 --- a/core/crates/heavy-lifting/src/file_identifier/mod.rs +++ b/core/crates/heavy-lifting/src/file_identifier/mod.rs @@ -190,15 +190,16 @@ fn orphan_path_filters_deep( ) } -async fn dispatch_object_processor_tasks( +async fn dispatch_object_processor_tasks( file_paths_by_cas_id: Iter, ctx: &impl OuterContext, - dispatcher: &impl TaskDispatcher, + dispatcher: &Dispatcher, with_priority: bool, -) -> Vec> +) -> Result>, Dispatcher::DispatchError> where Iter: IntoIterator, Vec)> + Send, Iter::IntoIter: Send, + Dispatcher: TaskDispatcher, { let mut current_batch = HashMap::<_, Vec<_>>::new(); let mut tasks = vec![]; @@ -215,7 +216,7 @@ where Arc::clone(ctx.sync()), with_priority, )) - .await, + .await?, ); } else { current_batch_size += objects_to_create_or_link.len(); @@ -237,7 +238,7 @@ where Arc::clone(ctx.sync()), with_priority, )) - .await, + .await?, ); current_batch_size = 0; @@ -254,11 +255,11 @@ where Arc::clone(ctx.sync()), with_priority, )) - .await, + .await?, ); } - tasks + Ok(tasks) } fn accumulate_file_paths_by_cas_id( diff --git a/core/crates/heavy-lifting/src/file_identifier/shallow.rs b/core/crates/heavy-lifting/src/file_identifier/shallow.rs index 8bccaef4fa5a..26bf2dcd6ca9 100644 --- a/core/crates/heavy-lifting/src/file_identifier/shallow.rs +++ b/core/crates/heavy-lifting/src/file_identifier/shallow.rs @@ -42,7 +42,6 @@ pub async fn shallow( dispatcher: &BaseTaskDispatcher, ctx: &impl OuterContext, ) -> Result, Error> { - let sub_path = sub_path.as_ref(); let db = ctx.db(); let location_path = maybe_missing(&location.path, "location.path") @@ -52,17 +51,20 @@ pub async fn shallow( let location = Arc::new(location); - let sub_iso_file_path = - maybe_get_iso_file_path_from_sub_path(location.id, &Some(sub_path), &*location_path, db) - .await - .map_err(file_identifier::Error::from)? - .map_or_else( - || { - IsolatedFilePathData::new(location.id, &*location_path, &*location_path, true) - .map_err(file_identifier::Error::from) - }, - Ok, - )?; + let sub_iso_file_path = maybe_get_iso_file_path_from_sub_path::( + location.id, + Some(sub_path.as_ref()), + &*location_path, + db, + ) + .await? + .map_or_else( + || { + IsolatedFilePathData::new(location.id, &*location_path, &*location_path, true) + .map_err(file_identifier::Error::from) + }, + Ok, + )?; let mut orphans_count = 0; let mut last_orphan_file_path_id = None; @@ -104,7 +106,8 @@ pub async fn shallow( Arc::clone(ctx.db()), Arc::clone(ctx.sync()), )) - .await, + .await + .expect("infallible"), ); } @@ -168,6 +171,7 @@ async fn process_tasks( true, ) .await + .expect("infallible") .into_iter() .map(CancelTaskOnDrop::new), ); diff --git a/core/crates/heavy-lifting/src/indexer/job.rs b/core/crates/heavy-lifting/src/indexer/job.rs index b4ff7a19c09e..05c30a74d1a7 100644 --- a/core/crates/heavy-lifting/src/indexer/job.rs +++ b/core/crates/heavy-lifting/src/indexer/job.rs @@ -6,7 +6,7 @@ use crate::{ }, report::ReportOutputMetadata, utils::cancel_pending_tasks, - SerializableJob, SerializedTasks, + JobCanceledError, JobErrorOrJobCanceledError, SerializableJob, SerializedTasks, }, utils::sub_path::get_full_path_from_sub_path, Error, LocationScanState, NonCriticalError, OuterContext, @@ -90,7 +90,7 @@ impl Job for Indexer { ) -> Result<(), Error> { let location_id = self.location.id; - self.pending_tasks_on_resume = dispatcher + if let Ok(tasks) = dispatcher .dispatch_many_boxed( rmp_serde::from_slice::)>>(&serialized_tasks) .map_err(indexer::Error::from)? @@ -109,7 +109,6 @@ impl Job for Indexer { db: Arc::clone(ctx.db()), }, iso_file_path_factory.clone(), - dispatcher.clone(), ), ) .await @@ -135,7 +134,12 @@ impl Job for Indexer { .await .map_err(indexer::Error::from)?, ) - .await; + .await + { + self.pending_tasks_on_resume = tasks; + } else { + warn!("Failed to dispatch tasks to resume as job was already canceled"); + } Ok(()) } @@ -157,8 +161,18 @@ impl Job for Indexer { ) -> Result { let mut pending_running_tasks = FuturesUnordered::new(); - self.init_or_resume(&mut pending_running_tasks, &ctx, &dispatcher) - .await?; + match self + .init_or_resume(&mut pending_running_tasks, &ctx, &dispatcher) + .await + { + Ok(()) => { /* Everything is awesome! */ } + Err(JobErrorOrJobCanceledError::JobError(e)) => { + return Err(e.into()); + } + Err(JobErrorOrJobCanceledError::JobCanceled(_)) => { + return Ok(self.cancel_job(&mut pending_running_tasks).await); + } + } if let Some(res) = self .process_handles(&mut pending_running_tasks, &ctx, &dispatcher) @@ -304,14 +318,14 @@ impl Indexer { any_task_output: Box, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, - ) -> Result>, indexer::Error> { + ) -> Result>, JobErrorOrJobCanceledError> { self.metadata.completed_tasks += 1; - if any_task_output.is::() { + if any_task_output.is::>() { return self .process_walk_output( *any_task_output - .downcast::() + .downcast::>() .expect("just checked"), ctx, dispatcher, @@ -348,7 +362,7 @@ impl Indexer { to_remove_count = to_remove.len(), accepted_ancestors_count = accepted_ancestors.len(), directory_iso_file_path = %directory_iso_file_path.as_ref().display(), - more_walker_tasks_count = handles.len(), + more_walker_tasks_count = keep_walking_tasks.len(), %total_size, ?scan_time, ) @@ -363,21 +377,22 @@ impl Indexer { errors, directory_iso_file_path, total_size, - mut handles, + keep_walking_tasks, scan_time, .. - }: walker::Output, + }: walker::Output, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, - ) -> Result>, indexer::Error> { + ) -> Result>, JobErrorOrJobCanceledError> { self.metadata.mean_scan_read_time += scan_time; - // Initially the handles vec only have walker tasks, but we will add saver and updater tasks later #[allow(clippy::cast_possible_truncation)] - // SAFETY: we know that `handles.len()` is a valid u32 as we wouldn't dispatch more than `u32::MAX` tasks + // SAFETY: we know that `keep_walking_tasks.len()` is a valid u32 as we wouldn't dispatch more than `u32::MAX` tasks { - self.metadata.total_walk_tasks += handles.len() as u32; + self.metadata.total_walk_tasks += keep_walking_tasks.len() as u32; } + let mut handles = dispatcher.dispatch_many(keep_walking_tasks).await?; + let (to_create_count, to_update_count) = (to_create.len(), to_update.len()); *self @@ -442,8 +457,8 @@ impl Indexer { self.metadata.total_tasks ); - handles.extend(dispatcher.dispatch_many(save_tasks).await); - handles.extend(dispatcher.dispatch_many(update_tasks).await); + handles.extend(dispatcher.dispatch_many(save_tasks).await?); + handles.extend(dispatcher.dispatch_many(update_tasks).await?); self.metadata.total_tasks += handles.len() as u64; @@ -521,11 +536,14 @@ impl Indexer { .await { Ok(more_handles) => more_handles, - Err(e) => { - cancel_pending_tasks(&*pending_running_tasks).await; + Err(JobErrorOrJobCanceledError::JobError(e)) => { + cancel_pending_tasks(pending_running_tasks).await; return Some(Err(e.into())); } + Err(JobErrorOrJobCanceledError::JobCanceled(_)) => { + return Some(Ok(self.cancel_job(pending_running_tasks).await)); + } }; pending_running_tasks.extend(more_handles); @@ -540,19 +558,17 @@ impl Indexer { } Ok(TaskStatus::Error(e)) => { - cancel_pending_tasks(&*pending_running_tasks).await; + cancel_pending_tasks(pending_running_tasks).await; return Some(Err(e)); } Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion) => { - cancel_pending_tasks(&*pending_running_tasks).await; - - return Some(Ok(ReturnStatus::Canceled)); + return Some(Ok(self.cancel_job(pending_running_tasks).await)); } Err(e) => { - cancel_pending_tasks(&*pending_running_tasks).await; + cancel_pending_tasks(pending_running_tasks).await; return Some(Err(e.into())); } @@ -567,13 +583,13 @@ impl Indexer { pending_running_tasks: &mut FuturesUnordered>, ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, - ) -> Result<(), indexer::Error> { + ) -> Result<(), JobErrorOrJobCanceledError> { // if we don't have any pending task, then this is a fresh job let updates = if self.pending_tasks_on_resume.is_empty() { let walker_root_path = Arc::new( - get_full_path_from_sub_path( + get_full_path_from_sub_path::( self.location.id, - &self.sub_path, + self.sub_path.as_ref(), &*self.iso_file_path_factory.location_path, ctx.db(), ) @@ -591,9 +607,8 @@ impl Indexer { location_id: self.location.id, db: Arc::clone(ctx.db()), }, - dispatcher.clone(), )?) - .await, + .await?, ); self.metadata.total_tasks = 1; @@ -634,20 +649,23 @@ impl Indexer { "last save task must be less than BATCH_SIZE paths" ); + self.metadata.total_tasks += 1; self.metadata.total_paths += self.to_create_buffer.len() as u64; self.metadata.total_save_tasks += 1; - pending_running_tasks.push( - dispatcher - .dispatch(tasks::Saver::new_deep( - self.location.id, - self.location.pub_id.clone(), - self.to_create_buffer.drain(..).collect(), - Arc::clone(ctx.db()), - Arc::clone(ctx.sync()), - )) - .await, - ); + if let Err(JobCanceledError(_)) = dispatcher + .dispatch(tasks::Saver::new_deep( + self.location.id, + self.location.pub_id.clone(), + self.to_create_buffer.drain(..).collect(), + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + )) + .await + .map(|task_handle| pending_running_tasks.push(task_handle)) + { + return Some(Ok(self.cancel_job(pending_running_tasks).await)); + } } if !self.to_update_buffer.is_empty() { @@ -656,18 +674,21 @@ impl Indexer { "last update task must be less than BATCH_SIZE paths" ); + self.metadata.total_tasks += 1; self.metadata.total_updated_paths += self.to_update_buffer.len() as u64; self.metadata.total_update_tasks += 1; - pending_running_tasks.push( - dispatcher - .dispatch(tasks::Updater::new_deep( - self.to_update_buffer.drain(..).collect(), - Arc::clone(ctx.db()), - Arc::clone(ctx.sync()), - )) - .await, - ); + if let Err(JobCanceledError(_)) = dispatcher + .dispatch(tasks::Updater::new_deep( + self.to_update_buffer.drain(..).collect(), + Arc::clone(ctx.db()), + Arc::clone(ctx.sync()), + )) + .await + .map(|task_handle| pending_running_tasks.push(task_handle)) + { + return Some(Ok(self.cancel_job(pending_running_tasks).await)); + } } self.process_handles(pending_running_tasks, ctx, dispatcher) @@ -694,6 +715,7 @@ impl Indexer { .into_iter() .map(|chunk| { let chunked_saves = chunk.collect::>(); + self.metadata.total_paths += chunked_saves.len() as u64; self.metadata.total_save_tasks += 1; @@ -707,7 +729,15 @@ impl Indexer { }) .collect::>(); - pending_running_tasks.extend(dispatcher.dispatch_many(save_tasks).await); + self.metadata.total_tasks += save_tasks.len() as u64; + + if let Err(JobCanceledError(_)) = dispatcher + .dispatch_many(save_tasks) + .await + .map(|task_handles| pending_running_tasks.extend(task_handles)) + { + return Some(Ok(self.cancel_job(pending_running_tasks).await)); + }; self.process_handles(pending_running_tasks, ctx, dispatcher) .await @@ -730,6 +760,7 @@ impl Indexer { .into_iter() .map(|chunk| { let chunked_saves = chunk.collect::>(); + self.metadata.total_paths += chunked_saves.len() as u64; self.metadata.total_save_tasks += 1; @@ -749,6 +780,7 @@ impl Indexer { .into_iter() .map(|chunk| { let chunked_updates = chunk.collect::>(); + self.metadata.total_updated_paths += chunked_updates.len() as u64; self.metadata.total_update_tasks += 1; @@ -821,6 +853,20 @@ impl Indexer { (save_tasks, update_tasks) } } + + async fn cancel_job( + &mut self, + pending_running_tasks: &mut FuturesUnordered>, + ) -> ReturnStatus { + cancel_pending_tasks(pending_running_tasks).await; + + ReturnStatus::Canceled( + JobReturn::builder() + .with_metadata(mem::take(&mut self.metadata)) + .with_non_critical_errors(mem::take(&mut self.errors)) + .build(), + ) + } } #[derive(Debug, Clone, Serialize, Deserialize, Default)] @@ -949,16 +995,12 @@ impl SerializableJob for Indexer { &tasks_for_shutdown .into_iter() .map(|task| async move { - if task - .is::>( - ) { - task - .downcast::>( - ) - .expect("just checked") - .serialize() - .await - .map(|bytes| (TaskKind::Walk, bytes)) + if task.is::>() { + task.downcast::>() + .expect("just checked") + .serialize() + .await + .map(|bytes| (TaskKind::Walk, bytes)) } else if task.is::() { task.downcast::() .expect("just checked") diff --git a/core/crates/heavy-lifting/src/indexer/mod.rs b/core/crates/heavy-lifting/src/indexer/mod.rs index f682b45595a0..0027fe51f92f 100644 --- a/core/crates/heavy-lifting/src/indexer/mod.rs +++ b/core/crates/heavy-lifting/src/indexer/mod.rs @@ -339,7 +339,7 @@ pub async fn reverse_update_directories_sizes( ), )) } else { - warn!("Got a missing ancestor for a file_path in the database, maybe we have a corruption"); + warn!("Got a missing ancestor for a file_path in the database, ignoring..."); None } }) diff --git a/core/crates/heavy-lifting/src/indexer/shallow.rs b/core/crates/heavy-lifting/src/indexer/shallow.rs index 980318377dc2..4ee5daf15cca 100644 --- a/core/crates/heavy-lifting/src/indexer/shallow.rs +++ b/core/crates/heavy-lifting/src/indexer/shallow.rs @@ -44,7 +44,6 @@ pub async fn shallow( dispatcher: &BaseTaskDispatcher, ctx: &impl OuterContext, ) -> Result, Error> { - let sub_path = sub_path.as_ref(); let db = ctx.db(); let sync = ctx.sync(); @@ -54,9 +53,13 @@ pub async fn shallow( .map_err(indexer::Error::from)?; let to_walk_path = Arc::new( - get_full_path_from_sub_path(location.id, &Some(sub_path), &*location_path, db) - .await - .map_err(indexer::Error::from)?, + get_full_path_from_sub_path::( + location.id, + Some(sub_path.as_ref()), + &*location_path, + db, + ) + .await?, ); let Some(walker::Output { @@ -141,7 +144,7 @@ async fn walk( to_walk_path: Arc, db: Arc, dispatcher: &BaseTaskDispatcher, -) -> Result, Error> { +) -> Result>, Error> { match dispatcher .dispatch(tasks::Walker::new_shallow( ToWalkEntry::from(&*to_walk_path), @@ -163,11 +166,12 @@ async fn walk( }, )?) .await + .expect("infallible") .await? { sd_task_system::TaskStatus::Done((_, TaskOutput::Out(data))) => Ok(Some( *data - .downcast::() + .downcast::>() .expect("we just dispatched this task"), )), sd_task_system::TaskStatus::Done((_, TaskOutput::Empty)) => { @@ -237,6 +241,7 @@ async fn save_and_update( for task_status in dispatcher .dispatch_many_boxed(save_and_update_tasks) .await + .expect("infallible") .into_iter() .map(CancelTaskOnDrop::new) .collect::>() diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs index 66e898d0e687..188e45005ba8 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/mod.rs @@ -15,8 +15,7 @@ use sd_core_prisma_helpers::{file_path_pub_and_cas_ids, file_path_walker}; use sd_prisma::prisma::file_path; use sd_task_system::{ - check_interruption, BaseTaskDispatcher, ExecStatus, Interrupter, IntoAnyTaskOutput, Task, - TaskDispatcher, TaskHandle, TaskId, + check_interruption, ExecStatus, Interrupter, IntoAnyTaskOutput, Task, TaskId, }; use sd_utils::{db::inode_from_db, error::FileIOError}; @@ -70,11 +69,10 @@ pub trait WalkerDBProxy: Clone + Send + Sync + fmt::Debug + 'static { } #[derive(Debug)] -pub struct Walker> +pub struct Walker where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, - Dispatcher: TaskDispatcher, { // Task control id: TaskId, @@ -92,7 +90,6 @@ where // Dependencies iso_file_path_factory: IsoPathFactory, db_proxy: DBProxy, - maybe_dispatcher: Option, // Non critical errors that happened during the task execution errors: Vec, @@ -103,7 +100,11 @@ where /// [`Walker`] Task output #[derive(Debug)] -pub struct Output { +pub struct Output +where + DBProxy: WalkerDBProxy, + IsoPathFactory: IsoFilePathFactory, +{ /// Entries found in the file system that need to be created in database pub to_create: Vec, /// Entries found in the file system that need to be updated in database @@ -121,18 +122,16 @@ pub struct Output { /// Total size of the directory that was indexed pub total_size: u64, /// Task handles that were dispatched to run `WalkDir` tasks for inner directories - pub handles: Vec>, + pub keep_walking_tasks: Vec>, /// Time spent walking through the received directory pub scan_time: Duration, } #[async_trait::async_trait] -impl Task - for Walker +impl Task for Walker where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, - Dispatcher: TaskDispatcher, { fn id(&self) -> TaskId { self.id @@ -155,6 +154,7 @@ where )] #[allow(clippy::blocks_in_conditions)] // Due to `err` on `instrument` macro above async fn run(&mut self, interrupter: &Interrupter) -> Result { + let is_shallow = self.is_shallow; let Self { root, entry: ToWalkEntry { @@ -166,7 +166,6 @@ where indexer_ruler, db_proxy, stage, - maybe_dispatcher, errors, scan_time, .. @@ -181,7 +180,7 @@ where non_indexed_paths, accepted_ancestors, total_size, - handles, + keep_walking_tasks, ) = loop { match stage { WalkerStage::Start => { @@ -276,7 +275,7 @@ where paths_metadatas_and_acceptance, } => { trace!("Processing rules results"); - let mut maybe_to_keep_walking = maybe_dispatcher.is_some().then(Vec::new); + let mut maybe_to_keep_walking = (!is_shallow).then(Vec::new); let (accepted_paths, accepted_ancestors, rejected_paths) = process_rules_results( root, @@ -284,10 +283,11 @@ where *parent_dir_accepted_by_its_children, paths_metadatas_and_acceptance, &mut maybe_to_keep_walking, - self.is_shallow, + is_shallow, errors, ) .await; + trace!( total_accepted_paths = accepted_paths.len(), total_accepted_ancestors = accepted_ancestors.len(), @@ -354,16 +354,14 @@ where "Finished segregating creates and updates!" ); - let handles = keep_walking( + let keep_walking_tasks = keep_walking( root, indexer_ruler, iso_file_path_factory, db_proxy, maybe_to_keep_walking.as_mut(), - maybe_dispatcher.as_ref(), errors, - ) - .await; + ); break ( to_create, @@ -372,7 +370,7 @@ where mem::take(non_indexed_paths), mem::take(accepted_ancestors), total_size, - handles, + keep_walking_tasks, ); } } @@ -391,7 +389,7 @@ where errors: mem::take(errors), directory_iso_file_path: mem::take(entry_iso_file_path), total_size, - handles, + keep_walking_tasks, scan_time: *scan_time, } .into_output(), @@ -431,11 +429,10 @@ enum WalkerStage { }, } -impl Walker +impl Walker where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, - Dispatcher: TaskDispatcher, { pub fn new_deep( entry: impl Into + Send, @@ -443,7 +440,6 @@ where indexer_ruler: IndexerRuler, iso_file_path_factory: IsoPathFactory, db_proxy: DBProxy, - dispatcher: Dispatcher, ) -> Result { let entry = entry.into(); Ok(Self { @@ -455,7 +451,6 @@ where db_proxy, stage: WalkerStage::Start, entry, - maybe_dispatcher: Some(dispatcher), is_shallow: false, errors: Vec::new(), scan_time: Duration::ZERO, @@ -463,7 +458,7 @@ where } } -impl Walker> +impl Walker where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, @@ -485,7 +480,6 @@ where db_proxy, stage: WalkerStage::Start, entry, - maybe_dispatcher: None, is_shallow: true, errors: Vec::new(), scan_time: Duration::ZERO, @@ -585,39 +579,38 @@ async fn segregate_creates_and_updates( } } -async fn keep_walking( +fn keep_walking( root: &Arc, indexer_ruler: &IndexerRuler, - iso_file_path_factory: &impl IsoFilePathFactory, - db_proxy: &impl WalkerDBProxy, + iso_file_path_factory: &IsoPathFactory, + db_proxy: &DBProxy, maybe_to_keep_walking: Option<&mut Vec>, - dispatcher: Option<&impl TaskDispatcher>, errors: &mut Vec, -) -> Vec> { - if let (Some(dispatcher), Some(to_keep_walking)) = (dispatcher, maybe_to_keep_walking) { - dispatcher - .dispatch_many( - to_keep_walking - .drain(..) - .map(|entry| { - Walker::new_deep( - entry, - Arc::clone(root), - indexer_ruler.clone(), - iso_file_path_factory.clone(), - db_proxy.clone(), - dispatcher.clone(), - ) - .map_err(|e| { - indexer::NonCriticalIndexerError::DispatchKeepWalking(e.to_string()) - }) +) -> Vec> +where + DBProxy: WalkerDBProxy, + IsoPathFactory: IsoFilePathFactory, +{ + maybe_to_keep_walking + .map(|to_keep_walking| { + to_keep_walking + .drain(..) + .map(|entry| { + Walker::new_deep( + entry, + Arc::clone(root), + indexer_ruler.clone(), + iso_file_path_factory.clone(), + db_proxy.clone(), + ) + .map_err(|e| { + indexer::NonCriticalIndexerError::DispatchKeepWalking(e.to_string()) }) - .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()), - ) - .await - } else { - Vec::new() - } + }) + .filter_map(|res| res.map_err(|e| errors.push(e.into())).ok()) + .collect() + }) + .unwrap_or_default() } async fn collect_metadata( @@ -887,7 +880,6 @@ mod tests { root_path: Arc::new(root_path.to_path_buf()), }, DummyDBProxy, - system.get_dispatcher(), ) .unwrap(), ) @@ -912,18 +904,18 @@ mod tests { to_create, accepted_ancestors, errors, - handles, + keep_walking_tasks, .. - } = *output.downcast::().unwrap(); + } = *output + .downcast::>() + .unwrap(); assert!(errors.is_empty(), "errors: {errors:#?}"); actual_set.extend(to_create); ancestors.extend(accepted_ancestors); - for handle in handles { - group.push(handle); - } + group.extend(system.dispatch_many(keep_walking_tasks).await); } for actual in &actual_set { diff --git a/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs b/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs index d5fdc72b8f3f..2dd66fc50bd3 100644 --- a/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs +++ b/core/crates/heavy-lifting/src/indexer/tasks/walker/save_state.rs @@ -11,7 +11,7 @@ use std::{ time::Duration, }; -use sd_task_system::{SerializableTask, TaskDispatcher, TaskId}; +use sd_task_system::{SerializableTask, TaskId}; use serde::{Deserialize, Serialize}; use super::{ @@ -154,16 +154,14 @@ impl From for WalkerStage { } } -impl SerializableTask - for Walker +impl SerializableTask for Walker where DBProxy: WalkerDBProxy, IsoPathFactory: IsoFilePathFactory, - Dispatcher: TaskDispatcher, { type SerializeError = rmp_serde::encode::Error; type DeserializeError = rmp_serde::decode::Error; - type DeserializeCtx = (IndexerRuler, DBProxy, IsoPathFactory, Dispatcher); + type DeserializeCtx = (IndexerRuler, DBProxy, IsoPathFactory); async fn serialize(self) -> Result, Self::SerializeError> { let Self { @@ -191,7 +189,7 @@ where async fn deserialize( data: &[u8], - (indexer_ruler, db_proxy, iso_file_path_factory, dispatcher): Self::DeserializeCtx, + (indexer_ruler, db_proxy, iso_file_path_factory): Self::DeserializeCtx, ) -> Result { rmp_serde::from_slice(data).map( |WalkDirSaveState { @@ -212,7 +210,6 @@ where iso_file_path_factory, db_proxy, stage: stage.into(), - maybe_dispatcher: (!is_shallow).then_some(dispatcher), errors, scan_time, is_shallow, diff --git a/core/crates/heavy-lifting/src/job_system/error.rs b/core/crates/heavy-lifting/src/job_system/error.rs index af212ef4e106..e22ca06e53ab 100644 --- a/core/crates/heavy-lifting/src/job_system/error.rs +++ b/core/crates/heavy-lifting/src/job_system/error.rs @@ -1,5 +1,3 @@ -use crate::Error; - use sd_utils::error::FileIOError; use prisma_client_rust::QueryError; @@ -17,9 +15,6 @@ pub enum JobSystemError { already_running_id: JobId, }, - #[error("job canceled: ")] - Canceled(JobId), - #[error("failed to load job reports from database to resume jobs: {0}")] LoadReportsForResume(#[from] QueryError), @@ -34,9 +29,6 @@ pub enum JobSystemError { #[error(transparent)] Report(#[from] ReportError), - - #[error(transparent)] - Processing(#[from] Error), } impl From for rspc::Error { @@ -45,17 +37,26 @@ impl From for rspc::Error { JobSystemError::NotFound(_) => { Self::with_cause(rspc::ErrorCode::NotFound, e.to_string(), e) } + JobSystemError::AlreadyRunning { .. } => { Self::with_cause(rspc::ErrorCode::Conflict, e.to_string(), e) } - JobSystemError::Canceled(_) => { - Self::with_cause(rspc::ErrorCode::ClientClosedRequest, e.to_string(), e) - } - JobSystemError::Processing(e) => e.into(), JobSystemError::Report(e) => e.into(), _ => Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e), } } } + +#[derive(thiserror::Error, Debug)] +#[error("job canceled: ")] +pub struct JobCanceledError(pub JobId); + +#[derive(Debug, thiserror::Error)] +pub enum JobErrorOrJobCanceledError> { + #[error(transparent)] + JobError(#[from] JobError), + #[error(transparent)] + JobCanceled(#[from] JobCanceledError), +} diff --git a/core/crates/heavy-lifting/src/job_system/job.rs b/core/crates/heavy-lifting/src/job_system/job.rs index 65b4935eb1ab..924be586b67e 100644 --- a/core/crates/heavy-lifting/src/job_system/job.rs +++ b/core/crates/heavy-lifting/src/job_system/job.rs @@ -9,6 +9,7 @@ use sd_task_system::{ use std::{ collections::{hash_map::DefaultHasher, VecDeque}, + fmt, hash::{Hash, Hasher}, marker::PhantomData, ops::{Deref, DerefMut}, @@ -30,13 +31,14 @@ use specta::Type; use strum::{Display, EnumString}; use tokio::{ spawn, - sync::{watch, Mutex}, + sync::{oneshot, watch, Mutex}, time::Instant, }; -use tracing::{debug, error, info, instrument, trace, warn}; +use tracing::{debug, error, info, instrument, trace, warn, Instrument, Level}; use uuid::Uuid; use super::{ + error::JobCanceledError, report::{ Report, ReportBuilder, ReportInputMetadata, ReportMetadata, ReportOutputMetadata, Status, }, @@ -59,11 +61,20 @@ pub enum JobName { FileValidator, } -#[derive(Debug)] pub enum ReturnStatus { Completed(JobReturn), Shutdown(Result>, rmp_serde::encode::Error>), - Canceled, + Canceled(JobReturn), +} + +impl fmt::Debug for ReturnStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Completed(job_return) => f.debug_tuple("Completed").field(job_return).finish(), + Self::Shutdown(_) => f.write_str("Shutdown()"), + Self::Canceled(job_return) => f.debug_tuple("Canceled").field(job_return).finish(), + } + } } pub enum ProgressUpdate { @@ -95,11 +106,14 @@ pub trait OuterContext: Send + Sync + Clone + 'static { pub trait JobContext: OuterContext { fn new(report: Report, ctx: OuterCtx) -> Self; - fn progress(&self, updates: Vec) -> impl Future + Send; + fn progress( + &self, + updates: impl IntoIterator + Send, + ) -> impl Future + Send; fn progress_msg(&self, msg: impl Into) -> impl Future + Send { let msg = msg.into(); async move { - self.progress(vec![ProgressUpdate::Message(msg)]).await; + self.progress([ProgressUpdate::Message(msg)]).await; } } fn report(&self) -> impl Future + Send> + Send; @@ -381,28 +395,75 @@ pub struct JobHandle> { pub(crate) id: JobId, pub(crate) start_time: Instant, pub(crate) run_time: Duration, + pub(crate) is_running: bool, pub(crate) next_jobs: VecDeque>>, pub(crate) ctx: JobCtx, - pub(crate) commands_tx: chan::Sender, + pub(crate) commands_tx: chan::Sender<(Command, oneshot::Sender<()>)>, } impl> JobHandle { - #[instrument(skip(self), fields(id = %self.id), err)] - pub async fn send_command(&mut self, command: Command) -> Result<(), JobSystemError> { + #[instrument(skip(self, outer_ack_tx), fields(job_id = %self.id))] + pub async fn send_command( + &mut self, + command: Command, + outer_ack_tx: oneshot::Sender>, + ) { trace!("JobHandle sending command"); - if self.commands_tx.send(command).await.is_err() { + + let (ack_tx, ack_rx) = oneshot::channel(); + + let res = if self.commands_tx.send((command, ack_tx)).await.is_err() { warn!("Tried to send command to a job that was already completed"); Ok(()) } else { - self.command_children(command).await + ack_rx + .await + .expect("inner ack channel closed before sending response to handle a job command"); + + match self.execute_command(command).await { + Ok(()) => self.command_children(command).await, + Err(e) => Err(e), + } + }; + + if res.is_ok() { + match command { + Command::Pause | Command::Cancel => self.is_running = false, + Command::Resume => self.is_running = true, + } } + + outer_ack_tx + .send(res) + .unwrap_or_else(|_| panic!("ack channel closed before sending {command:?} response")); + } + + #[instrument(skip_all, err)] + async fn execute_command(&mut self, command: Command) -> Result<(), JobSystemError> { + let (new_status, completed_at) = match command { + Command::Pause => (Status::Paused, None), + Command::Resume => (Status::Running, None), + Command::Cancel => (Status::Canceled, Some(Utc::now())), + }; + + { + let mut report = self.ctx.report_mut().await; + + report.status = new_status; + report.completed_at = completed_at; + + report.update(self.ctx.db()).await?; + } + + Ok(()) } + #[instrument(skip_all, err)] async fn command_children(&mut self, command: Command) -> Result<(), JobSystemError> { let (new_status, completed_at) = match command { Command::Pause => (Status::Paused, None), - Command::Resume => return Ok(()), + Command::Resume => (Status::Queued, None), Command::Cancel => (Status::Canceled, Some(Utc::now())), }; @@ -427,7 +488,12 @@ impl> JobHandle, @@ -435,35 +501,41 @@ impl> JobHandle> JobHandle> JobHandle Result<(), JobSystemError> { + pub async fn cancel_job( + &mut self, + JobReturn { + data, + metadata, + non_critical_errors, + }: JobReturn, + ) -> Result { trace!("JobHandle canceling job"); let db = self.ctx.db(); - { + let output = { let mut report = self.ctx.report_mut().await; - info!( + debug!( job_name = %report.name, "Job canceled, we will cancel all children jobs", ); report.status = Status::Canceled; + report.non_critical_errors.extend(non_critical_errors); + report.metadata.extend(metadata.into_iter().map(Into::into)); report.completed_at = Some(Utc::now()); report.update(db).await?; - } + + JobOutput { + id: report.id, + status: report.status, + job_name: report.name, + data, + metadata: report.metadata.clone(), + non_critical_errors: report.non_critical_errors.clone(), + } + }; trace!("JobHandle sending cancel command to children"); - self.command_children(Command::Cancel).await + self.command_children(Command::Cancel).await?; + + Ok(output) } } @@ -691,6 +788,7 @@ where JobHandle { id: self.id, start_time: Instant::now(), + is_running: true, run_time: Duration::ZERO, next_jobs: self.next_jobs, ctx, @@ -730,23 +828,24 @@ where JobHandle { id: self.id, - next_jobs: self.next_jobs, start_time: Instant::now(), + is_running: true, run_time: self.run_time, + next_jobs: self.next_jobs, ctx, commands_tx, } } } -#[instrument(skip_all, fields(id = %id, name = %J::NAME))] +#[instrument(name = "job_executor", skip_all, fields(%job_id, name = %J::NAME))] async fn to_spawn_job( - id: JobId, + job_id: JobId, mut job: J, ctx: JobCtx, existing_tasks: Option, base_dispatcher: BaseTaskDispatcher, - commands_rx: chan::Receiver, + commands_rx: chan::Receiver<(Command, oneshot::Sender<()>)>, done_tx: chan::Sender<(JobId, Result)>, ) where OuterCtx: OuterContext, @@ -754,7 +853,7 @@ async fn to_spawn_job( J: Job, { enum StreamMessage { - Commands(Command), + Commands((Command, oneshot::Sender<()>)), NewRemoteController(TaskRemoteController), Done(Result), } @@ -764,12 +863,12 @@ async fn to_spawn_job( let (running_state_tx, running_state_rx) = watch::channel(JobRunningState::Running); let (dispatcher, remote_controllers_rx) = - JobTaskDispatcher::new(base_dispatcher, running_state_rx); + JobTaskDispatcher::new(job_id, base_dispatcher, running_state_rx); if let Some(existing_tasks) = existing_tasks { if let Err(e) = job.resume_tasks(&dispatcher, &ctx, existing_tasks).await { done_tx - .send((id, Err(e))) + .send((job_id, Err(e))) .await .expect("jobs done tx closed on error at resume_tasks"); @@ -777,25 +876,53 @@ async fn to_spawn_job( } } + let (tx, rx) = chan::bounded(1); + + spawn( + async move { + tx.send(job.run::(dispatcher, ctx).await) + .await + .expect("job run channel closed"); + } + .in_current_span(), + ); + + let commands_rx_to_close = commands_rx.clone(); + let mut msgs_stream = pin!(( commands_rx.map(StreamMessage::Commands), - remote_controllers_rx.map(StreamMessage::NewRemoteController), - stream::once(job.run::(dispatcher, ctx)).map(StreamMessage::Done), + remote_controllers_rx + .clone() + .map(StreamMessage::NewRemoteController), + stream::once({ + let rx = rx.clone(); + async move { rx.recv().await.expect("job run rx closed") } + }) + .map(StreamMessage::Done), ) .merge()); while let Some(msg) = msgs_stream.next().await { match msg { StreamMessage::NewRemoteController(remote_controller) => { + trace!("new remote controller received"); remote_controllers.push(remote_controller); + trace!("added new remote controller"); } - StreamMessage::Commands(command) => { + StreamMessage::Commands((command, ack_tx)) => { + // Add any possible pending remote controllers to the list + while let Ok(remote_controller) = remote_controllers_rx.try_recv() { + remote_controllers.push(remote_controller); + } + remote_controllers.retain(|controller| !controller.is_done()); match command { Command::Pause => { trace!("Pausing job"); running_state_tx.send_modify(|state| *state = JobRunningState::Paused); + trace!(tasks_count = remote_controllers.len(), "pausing tasks"); + remote_controllers .iter() .map(TaskRemoteController::pause) @@ -807,13 +934,18 @@ async fn to_spawn_job( if let Err(e) = res { assert!(matches!(e, TaskSystemError::TaskNotFound(_))); - warn!("Tried to pause a task that was already completed"); + trace!("Tried to pause a task that was already completed"); } }); + + ack_tx.send(()).expect("ack channel closed"); + trace!("paused job"); } + Command::Resume => { trace!("Resuming job"); running_state_tx.send_modify(|state| *state = JobRunningState::Running); + trace!(tasks_count = remote_controllers.len(), "resuming tasks"); remote_controllers .iter() @@ -826,47 +958,85 @@ async fn to_spawn_job( if let Err(e) = res { assert!(matches!(e, TaskSystemError::TaskNotFound(_))); - warn!("Tried to resume a task that was already completed"); + trace!("Tried to resume a task that was already completed"); } }); + + ack_tx.send(()).expect("ack channel closed"); + trace!("resumed job"); } + Command::Cancel => { trace!("Canceling job"); + running_state_tx.send_modify(|state| *state = JobRunningState::Canceled); + trace!(tasks_count = remote_controllers.len(), "canceling tasks"); + remote_controllers .iter() .map(TaskRemoteController::cancel) .collect::>() .join() - .await; - - return done_tx - .send((id, Ok(ReturnStatus::Canceled))) .await - .expect("jobs done tx closed"); + .into_iter() + .for_each(|res| { + if let Err(e) = res { + assert!(matches!(e, TaskSystemError::TaskNotFound(_))); + + trace!("Tried to cancel a task that was already completed"); + } + }); + + trace!("canceled job"); + + commands_rx_to_close.close(); + trace!("Finishing job"); + let res = rx.recv().await.expect("job run rx closed"); + ack_tx.send(()).expect("ack channel closed"); + trace!("Job cancellation done"); + + return finish_job(job_id, res, remote_controllers, done_tx).await; } } } StreamMessage::Done(res) => { trace!("Job done"); - #[cfg(debug_assertions)] - { - // Just a sanity check to make sure we don't have any pending tasks left - remote_controllers.retain(|controller| !controller.is_done()); - assert!(remote_controllers.is_empty()); - // Using #[cfg(debug_assertions)] to don't pay this retain cost in release builds - } - - return done_tx.send((id, res)).await.expect("jobs done tx closed"); + commands_rx_to_close.close(); + return finish_job(job_id, res, remote_controllers, done_tx).await; } } } } +#[instrument(skip(remote_controllers, done_tx))] +async fn finish_job( + job_id: JobId, + job_result: Result, + mut remote_controllers: Vec, + done_tx: chan::Sender<(JobId, Result)>, +) { + trace!("Checking remove controllers"); + #[cfg(debug_assertions)] + { + // Just a sanity check to make sure we don't have any pending tasks left + remote_controllers.retain(|controller| !controller.is_done()); + assert!(remote_controllers.is_empty()); + // Using #[cfg(debug_assertions)] to don't pay this retain cost in release builds + } + + trace!("Sending job done message"); + + done_tx + .send((job_id, job_result)) + .await + .expect("jobs done tx closed"); +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum JobRunningState { Running, Paused, + Canceled, } impl Default for JobRunningState { @@ -877,32 +1047,56 @@ impl Default for JobRunningState { #[derive(Debug, Clone)] pub struct JobTaskDispatcher { + job_id: JobId, dispatcher: BaseTaskDispatcher, remote_controllers_tx: chan::Sender, running_state: Arc>>, } impl TaskDispatcher for JobTaskDispatcher { - async fn dispatch_boxed(&self, boxed_task: Box>) -> TaskHandle { - self.wait_for_dispatch_approval().await; + type DispatchError = JobCanceledError; - let handle = self.dispatcher.dispatch_boxed(boxed_task).await; + async fn dispatch_boxed( + &self, + boxed_task: Box>, + ) -> Result, Self::DispatchError> { + if matches!( + self.wait_for_dispatch_approval().await, + DispatchApproval::Canceled + ) { + return Err(JobCanceledError(self.job_id)); + } + + let handle = self + .dispatcher + .dispatch_boxed(boxed_task) + .await + .expect("infallible"); self.remote_controllers_tx .send(handle.remote_controller()) .await .expect("remote controllers tx closed"); - handle + Ok(handle) } async fn dispatch_many_boxed( &self, boxed_tasks: impl IntoIterator>> + Send, - ) -> Vec> { - self.wait_for_dispatch_approval().await; + ) -> Result>, Self::DispatchError> { + if matches!( + self.wait_for_dispatch_approval().await, + DispatchApproval::Canceled + ) { + return Err(JobCanceledError(self.job_id)); + } - let handles = self.dispatcher.dispatch_many_boxed(boxed_tasks).await; + let handles = self + .dispatcher + .dispatch_many_boxed(boxed_tasks) + .await + .expect("infallible"); handles .iter() @@ -912,12 +1106,18 @@ impl TaskDispatcher for JobTaskDispatcher { .await .expect("remote controllers tx closed"); - handles + Ok(handles) } } +enum DispatchApproval { + Approved, + Canceled, +} + impl JobTaskDispatcher { fn new( + job_id: JobId, dispatcher: BaseTaskDispatcher, running_state_rx: watch::Receiver, ) -> (Self, chan::Receiver) { @@ -925,6 +1125,7 @@ impl JobTaskDispatcher { ( Self { + job_id, dispatcher, remote_controllers_tx, running_state: Arc::new(Mutex::new(running_state_rx)), @@ -933,12 +1134,28 @@ impl JobTaskDispatcher { ) } - async fn wait_for_dispatch_approval(&self) { - self.running_state - .lock() - .await - .wait_for(|state| *state == JobRunningState::Running) - .await - .expect("job running state watch channel unexpectedly closed"); + async fn wait_for_dispatch_approval(&self) -> DispatchApproval { + { + let mut running_state_rx = self.running_state.lock().await; + + if running_state_rx + .has_changed() + .expect("job running state watch channel unexpectedly closed") + { + trace!("waiting for job running state to change"); + running_state_rx + .wait_for(|state| { + *state == JobRunningState::Running || *state == JobRunningState::Canceled + }) + .await + .expect("job running state watch channel unexpectedly closed"); + + if matches!(*running_state_rx.borrow(), JobRunningState::Canceled) { + return DispatchApproval::Canceled; + } + } + } + + DispatchApproval::Approved } } diff --git a/core/crates/heavy-lifting/src/job_system/mod.rs b/core/crates/heavy-lifting/src/job_system/mod.rs index ae5fe256a2e4..7080bbd2240b 100644 --- a/core/crates/heavy-lifting/src/job_system/mod.rs +++ b/core/crates/heavy-lifting/src/job_system/mod.rs @@ -16,7 +16,7 @@ use async_channel as chan; use futures::Stream; use futures_concurrency::future::{Join, TryJoin}; use tokio::{fs, spawn, sync::oneshot, task::JoinHandle}; -use tracing::{debug, error, info, trace, warn}; +use tracing::{debug, error, info, instrument, trace, warn}; use uuid::Uuid; mod error; @@ -26,7 +26,7 @@ mod runner; mod store; pub mod utils; -pub use error::JobSystemError; +pub use error::{JobCanceledError, JobSystemError, JobErrorOrJobCanceledError}; use job::{IntoJob, Job, JobName, JobOutput, OuterContext}; use report::Report; use runner::{run, JobSystemRunner, RunnerMessage}; @@ -47,7 +47,7 @@ pub enum Command { pub struct JobSystem> { msgs_tx: chan::Sender>, - job_outputs_rx: chan::Receiver<(JobId, Result)>, + job_outputs_rx: chan::Receiver<(JobId, Result)>, store_jobs_file: Arc, runner_handle: RefCell>>, } @@ -58,7 +58,7 @@ impl> JobSystem, ) -> Self { let (job_outputs_tx, job_outputs_rx) = chan::unbounded(); - let (job_return_status_tx, job_return_status_rx) = chan::bounded(16); + let (job_done_tx, job_done_rx) = chan::bounded(16); let (msgs_tx, msgs_rx) = chan::bounded(8); let store_jobs_file = Arc::new(data_directory.as_ref().join(PENDING_JOBS_FILE)); @@ -70,8 +70,8 @@ impl> JobSystem> JobSystem> JobSystem HashMap { let (ack_tx, ack_rx) = oneshot::channel(); @@ -140,7 +142,9 @@ impl> JobSystem> JobSystem> JobSystem> JobSystem>( &self, @@ -204,7 +212,7 @@ impl> JobSystem> JobSystem bool { let ctx_id = ctx.id(); @@ -236,17 +246,16 @@ impl> JobSystem impl Stream)> { + pub fn receive_job_outputs(&self) -> impl Stream)> { self.job_outputs_rx.clone() } - async fn send_command(&self, id: JobId, command: Command) -> Result<(), JobSystemError> { + #[instrument(skip(self), err)] + async fn send_command(&self, job_id: JobId, command: Command) -> Result<(), JobSystemError> { let (ack_tx, ack_rx) = oneshot::channel(); self.msgs_tx .send(RunnerMessage::Command { - id, + job_id, command, ack_tx, }) @@ -260,16 +269,16 @@ impl> JobSystem Result<(), JobSystemError> { - self.send_command(id, Command::Pause).await + pub async fn pause(&self, job_id: JobId) -> Result<(), JobSystemError> { + self.send_command(job_id, Command::Pause).await } - pub async fn resume(&self, id: JobId) -> Result<(), JobSystemError> { - self.send_command(id, Command::Resume).await + pub async fn resume(&self, job_id: JobId) -> Result<(), JobSystemError> { + self.send_command(job_id, Command::Resume).await } - pub async fn cancel(&self, id: JobId) -> Result<(), JobSystemError> { - self.send_command(id, Command::Cancel).await + pub async fn cancel(&self, job_id: JobId) -> Result<(), JobSystemError> { + self.send_command(job_id, Command::Cancel).await } } @@ -338,7 +347,7 @@ async fn load_stored_job_entries Result<(), ReportError> { - let now = Utc::now(); - + pub async fn create( + &mut self, + db: &PrismaClient, + created_at: DateTime, + ) -> Result<(), ReportError> { db.job() .create( self.id.as_bytes().to_vec(), @@ -271,7 +273,7 @@ impl Report { [ job::name::set(Some(self.name.to_string())), job::action::set(self.action.clone()), - job::date_created::set(Some(now.into())), + job::date_created::set(Some(created_at.into())), job::metadata::set(Some(serde_json::to_vec(&self.metadata)?)), job::status::set(Some(self.status as i32)), job::date_started::set(self.started_at.map(Into::into)), @@ -288,7 +290,7 @@ impl Report { .map_err(ReportError::Create)?; // Only setting created_at after we successfully created the job in DB - self.created_at = Some(now); + self.created_at = Some(created_at); Ok(()) } diff --git a/core/crates/heavy-lifting/src/job_system/runner.rs b/core/crates/heavy-lifting/src/job_system/runner.rs index 9b92f9097a1f..5ae2f9bcb2db 100644 --- a/core/crates/heavy-lifting/src/job_system/runner.rs +++ b/core/crates/heavy-lifting/src/job_system/runner.rs @@ -41,14 +41,14 @@ const FIVE_MINUTES: Duration = Duration::from_secs(5 * 60); pub(super) enum RunnerMessage> { NewJob { - id: JobId, + job_id: JobId, location_id: location::id::Type, dyn_job: Box>, ctx: OuterCtx, ack_tx: oneshot::Sender>, }, ResumeStoredJob { - id: JobId, + job_id: JobId, location_id: location::id::Type, dyn_job: Box>, ctx: OuterCtx, @@ -56,14 +56,14 @@ pub(super) enum RunnerMessage>, }, Command { - id: JobId, + job_id: JobId, command: Command, ack_tx: oneshot::Sender>, }, GetActiveReports { ack_tx: oneshot::Sender>, }, - CheckIfJobAreRunning { + CheckIfJobsAreRunning { job_names: Vec, location_id: location::id::Type, ack_tx: oneshot::Sender, @@ -89,14 +89,14 @@ pub(super) struct JobSystemRunner>, worktables: JobsWorktables, job_return_status_tx: chan::Sender<(JobId, Result)>, - job_outputs_tx: chan::Sender<(JobId, Result)>, + job_outputs_tx: chan::Sender<(JobId, Result)>, } impl> JobSystemRunner { pub(super) fn new( base_dispatcher: BaseTaskDispatcher, job_return_status_tx: chan::Sender<(JobId, Result)>, - job_outputs_tx: chan::Sender<(JobId, Result)>, + job_outputs_tx: chan::Sender<(JobId, Result)>, ) -> Self { Self { on_shutdown_mode: false, @@ -116,7 +116,7 @@ impl> JobSystemRunner>, ctx: OuterCtx, @@ -137,25 +137,22 @@ impl> JobSystemRunner> JobSystemRunner>() - .try_join() - .await?; - - handle.ctx.invalidate_query("jobs.isActive"); - handle.ctx.invalidate_query("jobs.reports"); - - handles.insert(id, handle); + handles.insert(job_id, handle); Ok(()) } @@ -226,12 +187,46 @@ impl> JobSystemRunner Result<(), JobSystemError> { - if let Some(handle) = self.handles.get_mut(&id) { - handle.send_command(command).await?; - Ok(()) + async fn process_command( + &mut self, + job_id: JobId, + command: Command, + ack_tx: oneshot::Sender>, + ) { + if let Some(handle) = self.handles.get_mut(&job_id) { + match (command, handle.is_running) { + (Command::Pause, false) => { + warn!("Tried to pause a job already paused"); + return ack_tx.send(Ok(())).expect( + "ack channel closed before sending response to already paused job", + ); + } + (Command::Resume, true) => { + warn!("Tried to resume a job already running"); + return ack_tx.send(Ok(())).expect( + "ack channel closed before sending response to already running job", + ); + } + _ => {} + } + match command { + Command::Pause | Command::Cancel => { + handle.is_running = false; + } + Command::Resume => { + handle.is_running = true; + } + } + handle.send_command(command, ack_tx).await; + handle.ctx.invalidate_query("jobs.isActive"); + handle.ctx.invalidate_query("jobs.reports"); } else { - Err(JobSystemError::NotFound(id)) + error!("Job not found"); + ack_tx + .send(Err(JobSystemError::NotFound(job_id))) + .unwrap_or_else(|_| { + panic!("ack channel closed before sending {command:?} response") + }); } } @@ -245,7 +240,7 @@ impl> JobSystemRunner, location_id: location::id::Type, @@ -310,7 +305,7 @@ impl> JobSystemRunner { @@ -369,12 +364,14 @@ impl> JobSystemRunner handle - .cancel_job() + Ok(ReturnStatus::Canceled(job_return)) => { + handle.cancel_job(job_return).await.map_err(Into::into) + } + Err(e) => handle + .failed_job(&e) .await - .and_then(|()| Err(JobSystemError::Canceled(job_id))), - - Err(e) => handle.failed_job(&e).await.and_then(|()| Err(e.into())), + .map_err(Into::into) + .and_then(|()| Err(e)), }; job_outputs_tx @@ -465,7 +462,7 @@ impl> JobSystemRunner bool { self.handles .values() - .any(|handle| handle.ctx.id() == ctx_id) + .any(|handle| handle.ctx.id() == ctx_id && handle.is_running) } } @@ -539,11 +536,13 @@ async fn try_dispatch_next_job>( mut runner: JobSystemRunner, store_jobs_file: impl AsRef + Send, msgs_rx: chan::Receiver>, - job_return_status_rx: chan::Receiver<(JobId, Result)>, + job_done_rx: chan::Receiver<(JobId, Result)>, ) { enum StreamMessage> { ReturnStatus((JobId, Result)), @@ -579,11 +578,11 @@ pub(super) async fn run>( let memory_cleanup_interval = interval_at(Instant::now() + FIVE_MINUTES, FIVE_MINUTES); - let job_return_status_rx_to_shutdown = job_return_status_rx.clone(); + let job_return_status_rx_to_shutdown = job_done_rx.clone(); let mut msg_stream = pin!(( msgs_rx.map(StreamMessage::RunnerMessage), - job_return_status_rx.map(StreamMessage::ReturnStatus), + job_done_rx.map(StreamMessage::ReturnStatus), IntervalStream::new(memory_cleanup_interval).map(|_| StreamMessage::CleanMemoryTick), ) .merge()); @@ -599,14 +598,18 @@ pub(super) async fn run>( // Runner messages StreamMessage::RunnerMessage(RunnerMessage::NewJob { - id, + job_id, location_id, dyn_job, ctx, ack_tx, }) => { ack_tx - .send(runner.new_job(id, location_id, dyn_job, ctx, None).await) + .send( + runner + .new_job(job_id, location_id, dyn_job, ctx, None) + .await, + ) .expect("ack channel closed before sending new job response"); } @@ -622,32 +625,27 @@ pub(super) async fn run>( .expect("ack channel closed before sending active reports response"); } StreamMessage::RunnerMessage(RunnerMessage::ResumeStoredJob { - id, + job_id, location_id, dyn_job, ctx, serialized_tasks, ack_tx, }) => { - let res = runner - .new_job(id, location_id, dyn_job, ctx, serialized_tasks) - .await; ack_tx - .send(res) + .send( + runner + .new_job(job_id, location_id, dyn_job, ctx, serialized_tasks) + .await, + ) .expect("ack channel closed before sending resume job response"); } StreamMessage::RunnerMessage(RunnerMessage::Command { - id, + job_id: id, command, ack_tx, - }) => { - ack_tx - .send(runner.process_command(id, command).await) - .unwrap_or_else(|_| { - panic!("ack channel closed before sending {command:?} response") - }); - } + }) => runner.process_command(id, command, ack_tx).await, StreamMessage::RunnerMessage(RunnerMessage::Shutdown) => { runner.on_shutdown_mode = true; @@ -675,13 +673,13 @@ pub(super) async fn run>( return; } - StreamMessage::RunnerMessage(RunnerMessage::CheckIfJobAreRunning { + StreamMessage::RunnerMessage(RunnerMessage::CheckIfJobsAreRunning { job_names, location_id, ack_tx, }) => { ack_tx - .send(runner.check_if_job_are_running(job_names, location_id)) + .send(runner.check_if_jobs_are_running(job_names, location_id)) .expect("ack channel closed before sending resume job response"); } diff --git a/core/crates/heavy-lifting/src/job_system/utils.rs b/core/crates/heavy-lifting/src/job_system/utils.rs index afa8ce56f796..37f5c4788d32 100644 --- a/core/crates/heavy-lifting/src/job_system/utils.rs +++ b/core/crates/heavy-lifting/src/job_system/utils.rs @@ -1,16 +1,33 @@ use crate::Error; -use sd_task_system::TaskHandle; +use sd_task_system::{TaskHandle, TaskStatus}; +use futures::{stream::FuturesUnordered, StreamExt}; use futures_concurrency::future::Join; +use tracing::{error, trace}; -pub async fn cancel_pending_tasks( - pending_tasks: impl IntoIterator> + Send, -) { +pub async fn cancel_pending_tasks(pending_tasks: &mut FuturesUnordered>) { pending_tasks - .into_iter() + .iter() .map(TaskHandle::cancel) .collect::>() .join() .await; + + trace!(total_tasks = %pending_tasks.len(), "canceled all pending tasks, now waiting completion"); + + while let Some(task_result) = pending_tasks.next().await { + match task_result { + Ok(TaskStatus::Done((task_id, _))) => trace!(%task_id, "canceled a completed task"), + + Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion | TaskStatus::Shutdown(_)) => { + trace!("job canceled task"); + // Job canceled task + } + + Ok(TaskStatus::Error(e)) => error!(%e, "job canceled an errored task"), + + Err(e) => error!(%e, "task system failed to cancel a task"), + } + } } diff --git a/core/crates/heavy-lifting/src/lib.rs b/core/crates/heavy-lifting/src/lib.rs index fbdc268e9ed1..c137584e738a 100644 --- a/core/crates/heavy-lifting/src/lib.rs +++ b/core/crates/heavy-lifting/src/lib.rs @@ -63,6 +63,9 @@ pub enum Error { #[error(transparent)] TaskSystem(#[from] TaskSystemError), + + #[error(transparent)] + JobSystem(#[from] JobSystemError), } impl From for rspc::Error { @@ -74,6 +77,7 @@ impl From for rspc::Error { Error::TaskSystem(e) => { Self::with_cause(rspc::ErrorCode::InternalServerError, e.to_string(), e) } + Error::JobSystem(e) => e.into(), } } } diff --git a/core/crates/heavy-lifting/src/media_processor/job.rs b/core/crates/heavy-lifting/src/media_processor/job.rs index 8a7217a04d6c..1769d4f24816 100644 --- a/core/crates/heavy-lifting/src/media_processor/job.rs +++ b/core/crates/heavy-lifting/src/media_processor/job.rs @@ -3,10 +3,10 @@ use crate::{ job::{Job, JobReturn, JobTaskDispatcher, ReturnStatus}, report::ReportOutputMetadata, utils::cancel_pending_tasks, - SerializableJob, SerializedTasks, + JobErrorOrJobCanceledError, SerializableJob, SerializedTasks, }, media_processor::{self, helpers::thumbnailer::THUMBNAIL_CACHE_DIR_NAME}, - utils::sub_path::{self, maybe_get_iso_file_path_from_sub_path}, + utils::sub_path::maybe_get_iso_file_path_from_sub_path, Error, JobContext, JobName, LocationScanState, OuterContext, ProgressUpdate, }; @@ -17,8 +17,8 @@ use sd_core_sync::Manager as SyncManager; use sd_file_ext::extensions::Extension; use sd_prisma::prisma::{location, PrismaClient}; use sd_task_system::{ - AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskOutput, - TaskStatus, TaskSystemError, + AnyTaskOutput, IntoTask, SerializableTask, Task, TaskDispatcher, TaskHandle, TaskId, + TaskOutput, TaskStatus, TaskSystemError, }; use sd_utils::{db::maybe_missing, u64_to_frontend}; @@ -114,7 +114,7 @@ impl Job for MediaProcessor { let reporter: Arc = Arc::new(NewThumbnailsReporter { ctx: ctx.clone() }); - self.pending_tasks_on_resume = dispatcher + if let Ok(tasks) = dispatcher .dispatch_many_boxed( rmp_serde::from_slice::)>>(&serialized_tasks) .map_err(media_processor::Error::from)? @@ -145,7 +145,12 @@ impl Job for MediaProcessor { .await .map_err(media_processor::Error::from)?, ) - .await; + .await + { + self.pending_tasks_on_resume = tasks; + } else { + warn!("Failed to dispatch tasks to resume as job was already canceled"); + } Ok(()) } @@ -168,8 +173,18 @@ impl Job for MediaProcessor { ) -> Result { let mut pending_running_tasks = FuturesUnordered::new(); - self.init_or_resume(&mut pending_running_tasks, &ctx, &dispatcher) - .await?; + match self + .init_or_resume(&mut pending_running_tasks, &ctx, &dispatcher) + .await + { + Ok(()) => { /* Everything is awesome! */ } + Err(JobErrorOrJobCanceledError::JobError(e)) => { + return Err(e.into()); + } + Err(JobErrorOrJobCanceledError::JobCanceled(_)) => { + return Ok(self.cancel_job(&mut pending_running_tasks).await); + } + } if let Some(res) = self.process_handles(&mut pending_running_tasks, &ctx).await { return res; @@ -240,15 +255,15 @@ impl MediaProcessor { pending_running_tasks: &mut FuturesUnordered>, job_ctx: &impl JobContext, dispatcher: &JobTaskDispatcher, - ) -> Result<(), media_processor::Error> { + ) -> Result<(), JobErrorOrJobCanceledError> { // if we don't have any pending task, then this is a fresh job if self.pending_tasks_on_resume.is_empty() { let location_id = self.location.id; let location_path = &*self.location_path; - let iso_file_path = maybe_get_iso_file_path_from_sub_path( + let iso_file_path = maybe_get_iso_file_path_from_sub_path::( location_id, - &self.sub_path, + self.sub_path.as_ref(), &*self.location_path, job_ctx.db(), ) @@ -256,7 +271,7 @@ impl MediaProcessor { .map_or_else( || { IsolatedFilePathData::new(location_id, location_path, location_path, true) - .map_err(sub_path::Error::from) + .map_err(media_processor::Error::from) }, Ok, )?; @@ -358,21 +373,19 @@ impl MediaProcessor { } Ok(TaskStatus::Error(e)) => { - cancel_pending_tasks(&*pending_running_tasks).await; + cancel_pending_tasks(pending_running_tasks).await; return Some(Err(e)); } Ok(TaskStatus::Canceled | TaskStatus::ForcedAbortion) => { - cancel_pending_tasks(&*pending_running_tasks).await; - - return Some(Ok(ReturnStatus::Canceled)); + return Some(Ok(self.cancel_job(pending_running_tasks).await)); } Err(TaskSystemError::TaskTimeout(task_id)) => { warn!( %task_id, - "Thumbnailer task timed out, we will keep processing the rest of the tasks" + "Thumbnailer task timed out, we will keep processing the rest of the tasks", ); self.errors.push( media_processor::NonCriticalMediaProcessorError::Thumbnailer( @@ -383,8 +396,8 @@ impl MediaProcessor { } Err(e) => { - error!("Task System error: {e:#?}"); - cancel_pending_tasks(&*pending_running_tasks).await; + error!(?e, "Task System error"); + cancel_pending_tasks(pending_running_tasks).await; return Some(Err(e.into())); } @@ -396,7 +409,7 @@ impl MediaProcessor { async fn process_task_output( &mut self, - task_id: uuid::Uuid, + task_id: TaskId, any_task_output: Box, job_ctx: &impl JobContext, ) { @@ -512,6 +525,20 @@ impl MediaProcessor { unreachable!("Unexpected task output type: "); } } + + async fn cancel_job( + &mut self, + pending_running_tasks: &mut FuturesUnordered>, + ) -> ReturnStatus { + cancel_pending_tasks(pending_running_tasks).await; + + ReturnStatus::Canceled( + JobReturn::builder() + .with_metadata(mem::take(&mut self.metadata)) + .with_non_critical_errors(mem::take(&mut self.errors)) + .build(), + ) + } } #[derive(Debug, Serialize, Deserialize, Default)] @@ -631,7 +658,7 @@ async fn dispatch_media_data_extractor_tasks( dispatcher: &JobTaskDispatcher, db: &Arc, sync: &Arc, -) -> Result<(u64, Vec>), media_processor::Error> { +) -> Result<(u64, Vec>), JobErrorOrJobCanceledError> { let (extract_exif_file_paths, extract_ffmpeg_file_paths) = ( get_all_children_files_by_extensions( parent_iso_file_path, @@ -689,7 +716,7 @@ async fn dispatch_media_data_extractor_tasks( "Dispatching media data extraction tasks", ); - Ok((files_count, dispatcher.dispatch_many_boxed(tasks).await)) + Ok((files_count, dispatcher.dispatch_many_boxed(tasks).await?)) } async fn get_all_children_files_by_extensions( @@ -750,7 +777,7 @@ async fn dispatch_thumbnailer_tasks( location_path: &PathBuf, dispatcher: &JobTaskDispatcher, ctx: &impl OuterContext, -) -> Result<(u64, Vec>), media_processor::Error> { +) -> Result<(u64, Vec>), JobErrorOrJobCanceledError> { let thumbnails_directory_path = Arc::new(ctx.get_data_directory().join(THUMBNAIL_CACHE_DIR_NAME)); let location_id = parent_iso_file_path.location_id(); @@ -833,7 +860,7 @@ async fn dispatch_thumbnailer_tasks( thumbs_count, dispatcher .dispatch_many_boxed(priority_tasks.into_iter().chain(non_priority_tasks)) - .await, + .await?, )) } diff --git a/core/crates/heavy-lifting/src/media_processor/shallow.rs b/core/crates/heavy-lifting/src/media_processor/shallow.rs index 6f96db0c9602..f4e840b355e4 100644 --- a/core/crates/heavy-lifting/src/media_processor/shallow.rs +++ b/core/crates/heavy-lifting/src/media_processor/shallow.rs @@ -49,14 +49,13 @@ pub async fn shallow( let location = Arc::new(location); - let sub_iso_file_path = maybe_get_iso_file_path_from_sub_path( + let sub_iso_file_path = maybe_get_iso_file_path_from_sub_path::( location.id, - &Some(sub_path), + Some(sub_path), &*location_path, ctx.db(), ) - .await - .map_err(media_processor::Error::from)? + .await? .map_or_else( || { IsolatedFilePathData::new(location.id, &*location_path, &*location_path, true) @@ -159,7 +158,7 @@ async fn dispatch_media_data_extractor_tasks( parent_iso_file_path: &IsolatedFilePathData<'_>, location_path: &Arc, dispatcher: &BaseTaskDispatcher, -) -> Result>, media_processor::Error> { +) -> Result>, Error> { let (extract_exif_file_paths, extract_ffmpeg_file_paths) = ( get_direct_children_files_by_extensions( parent_iso_file_path, @@ -209,7 +208,10 @@ async fn dispatch_media_data_extractor_tasks( ) .collect::>(); - Ok(dispatcher.dispatch_many_boxed(tasks).await) + Ok(dispatcher + .dispatch_many_boxed(tasks) + .await + .expect("infallible")) } async fn dispatch_thumbnailer_tasks( @@ -218,7 +220,7 @@ async fn dispatch_thumbnailer_tasks( location_path: &PathBuf, dispatcher: &BaseTaskDispatcher, ctx: &impl OuterContext, -) -> Result>, media_processor::Error> { +) -> Result>, Error> { let thumbnails_directory_path = Arc::new(ctx.get_data_directory().join(THUMBNAIL_CACHE_DIR_NAME)); let location_id = parent_iso_file_path.location_id(); @@ -259,5 +261,8 @@ async fn dispatch_thumbnailer_tasks( tasks.len(), ); - Ok(dispatcher.dispatch_many_boxed(tasks).await) + Ok(dispatcher + .dispatch_many_boxed(tasks) + .await + .expect("infallible")) } diff --git a/core/crates/heavy-lifting/src/utils/sub_path.rs b/core/crates/heavy-lifting/src/utils/sub_path.rs index 91e3b88c7f39..5f78c942b3a1 100644 --- a/core/crates/heavy-lifting/src/utils/sub_path.rs +++ b/core/crates/heavy-lifting/src/utils/sub_path.rs @@ -34,55 +34,82 @@ impl From for rspc::Error { } } -pub async fn get_full_path_from_sub_path( +pub async fn get_full_path_from_sub_path>( location_id: location::id::Type, - sub_path: &Option + Send + Sync>, + sub_path: Option + Send + Sync>, location_path: impl AsRef + Send, db: &PrismaClient, -) -> Result { - let location_path = location_path.as_ref(); - - match sub_path { - Some(sub_path) if sub_path.as_ref() != Path::new("") => { - let sub_path = sub_path.as_ref(); - let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?; - - ensure_sub_path_is_directory(location_path, sub_path).await?; - - ensure_file_path_exists( - sub_path, - &IsolatedFilePathData::new(location_id, location_path, &full_path, true)?, - db, - Error::SubPathNotFound, - ) - .await?; - - Ok(full_path) +) -> Result { + async fn inner( + location_id: location::id::Type, + sub_path: Option<&Path>, + location_path: &Path, + db: &PrismaClient, + ) -> Result { + match sub_path { + Some(sub_path) if sub_path != Path::new("") => { + let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?; + + ensure_sub_path_is_directory(location_path, sub_path).await?; + + ensure_file_path_exists( + sub_path, + &IsolatedFilePathData::new(location_id, location_path, &full_path, true)?, + db, + Error::SubPathNotFound, + ) + .await?; + + Ok(full_path) + } + _ => Ok(location_path.to_path_buf()), } - _ => Ok(location_path.to_path_buf()), } + + inner( + location_id, + sub_path.as_ref().map(AsRef::as_ref), + location_path.as_ref(), + db, + ) + .await + .map_err(E::from) } -pub async fn maybe_get_iso_file_path_from_sub_path( +pub async fn maybe_get_iso_file_path_from_sub_path>( location_id: location::id::Type, - sub_path: &Option + Send + Sync>, + sub_path: Option + Send + Sync>, location_path: impl AsRef + Send, db: &PrismaClient, -) -> Result>, Error> { - let location_path = location_path.as_ref(); - - match sub_path { - Some(sub_path) if sub_path.as_ref() != Path::new("") => { - let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?; - ensure_sub_path_is_directory(location_path, sub_path).await?; - - let sub_iso_file_path = - IsolatedFilePathData::new(location_id, location_path, &full_path, true)?; - - ensure_file_path_exists(sub_path, &sub_iso_file_path, db, Error::SubPathNotFound) - .await - .map(|()| Some(sub_iso_file_path)) +) -> Result>, E> { + async fn inner( + location_id: location::id::Type, + sub_path: Option<&Path>, + location_path: &Path, + db: &PrismaClient, + ) -> Result>, Error> { + match sub_path { + Some(sub_path) if sub_path != Path::new("") => { + let full_path = ensure_sub_path_is_in_location(location_path, sub_path).await?; + ensure_sub_path_is_directory(location_path, sub_path).await?; + + let sub_iso_file_path = + IsolatedFilePathData::new(location_id, location_path, &full_path, true)?; + + ensure_file_path_exists(sub_path, &sub_iso_file_path, db, Error::SubPathNotFound) + .await + .map(|()| Some(sub_iso_file_path)) + } + _ => Ok(None), } - _ => Ok(None), } + + inner( + location_id, + sub_path.as_ref().map(AsRef::as_ref), + location_path.as_ref(), + db, + ) + .await + .map_err(E::from) } diff --git a/core/src/api/jobs.rs b/core/src/api/jobs.rs index 3300809c69cd..12a95b6ae701 100644 --- a/core/src/api/jobs.rs +++ b/core/src/api/jobs.rs @@ -81,7 +81,8 @@ pub(crate) fn mount() -> AlphaRouter { // - TODO: refactor grouping system to a many-to-many table #[derive(Debug, Clone, Serialize, Type)] pub struct JobGroup { - id: Uuid, + id: JobId, + running_job_id: Option, action: Option, status: report::Status, created_at: DateTime, @@ -136,6 +137,9 @@ pub(crate) fn mount() -> AlphaRouter { Entry::Vacant(entry) => { entry.insert(JobGroup { id: job.parent_id.unwrap_or(job.id), + running_job_id: (job.status == report::Status::Running + || job.status == report::Status::Paused) + .then_some(job.id), action: Some(action_name), status: job.status, jobs: [report.clone()].into_iter().collect(), @@ -146,8 +150,10 @@ pub(crate) fn mount() -> AlphaRouter { Entry::Occupied(mut entry) => { let group = entry.get_mut(); - // protect paused status from being overwritten - if report.status != report::Status::Paused { + if report.status == report::Status::Running + || report.status == report::Status::Paused + { + group.running_job_id = Some(report.id); group.status = report.status; } @@ -160,6 +166,7 @@ pub(crate) fn mount() -> AlphaRouter { job.id.to_string(), JobGroup { id: job.id, + running_job_id: Some(job.id), action: None, status: job.status, jobs: [report.clone()].into_iter().collect(), @@ -225,16 +232,17 @@ pub(crate) fn mount() -> AlphaRouter { // pause job .procedure("pause", { R.with2(library()) - .mutation(|(node, library), id: JobId| async move { - if let Err(e) = node.job_system.pause(id).await { + .mutation(|(node, library), job_id: JobId| async move { + if let Err(e) = node.job_system.pause(job_id).await { if matches!(e, JobSystemError::NotFound(_)) { // If the job is not found, it can be a job from the old job system - node.old_jobs.pause(id).await?; + node.old_jobs.pause(job_id).await?; } else { return Err(e.into()); } } + invalidate_query!(library, "jobs.isActive"); invalidate_query!(library, "jobs.reports"); Ok(()) @@ -242,16 +250,17 @@ pub(crate) fn mount() -> AlphaRouter { }) .procedure("resume", { R.with2(library()) - .mutation(|(node, library), id: Uuid| async move { - if let Err(e) = node.job_system.resume(id).await { + .mutation(|(node, library), job_id: JobId| async move { + if let Err(e) = node.job_system.resume(job_id).await { if matches!(e, JobSystemError::NotFound(_)) { // If the job is not found, it can be a job from the old job system - node.old_jobs.resume(id).await?; + node.old_jobs.resume(job_id).await?; } else { return Err(e.into()); } } + invalidate_query!(library, "jobs.isActive"); invalidate_query!(library, "jobs.reports"); Ok(()) @@ -259,16 +268,17 @@ pub(crate) fn mount() -> AlphaRouter { }) .procedure("cancel", { R.with2(library()) - .mutation(|(node, library), id: Uuid| async move { - if let Err(e) = node.job_system.cancel(id).await { + .mutation(|(node, library), job_id: JobId| async move { + if let Err(e) = node.job_system.cancel(job_id).await { if matches!(e, JobSystemError::NotFound(_)) { // If the job is not found, it can be a job from the old job system - node.old_jobs.cancel(id).await?; + node.old_jobs.cancel(job_id).await?; } else { return Err(e.into()); } } + invalidate_query!(library, "jobs.isActive"); invalidate_query!(library, "jobs.reports"); Ok(()) @@ -373,14 +383,6 @@ pub(crate) fn mount() -> AlphaRouter { return Err(LocationError::IdNotFound(id).into()); }; - // OldJob::new(OldFileIdentifierJobInit { - // location, - // sub_path: Some(args.path), - // }) - // .spawn(&node, &library) - // .await - // .map_err(Into::into) - node.job_system .dispatch( FileIdentifier::new(location, Some(path))?, diff --git a/core/src/context.rs b/core/src/context.rs index c308d3c5cf04..772883a1b9fe 100644 --- a/core/src/context.rs +++ b/core/src/context.rs @@ -129,7 +129,7 @@ impl sd_core_heavy_lifting::JobContext< } } - async fn progress(&self, updates: Vec) { + async fn progress(&self, updates: impl IntoIterator + Send) { let mut report = self.report.write().await; // protect against updates if job is not running @@ -137,6 +137,8 @@ impl sd_core_heavy_lifting::JobContext< return; }; + let mut changed_phase = false; + for update in updates { match update { ProgressUpdate::TaskCount(task_count) => { @@ -157,6 +159,7 @@ impl sd_core_heavy_lifting::JobContext< report.phase ); report.phase = phase; + changed_phase = true; } } } @@ -183,7 +186,7 @@ impl sd_core_heavy_lifting::JobContext< let counter = self.report_update_counter.fetch_add(1, Ordering::AcqRel); - if counter == 50 || counter == 0 { + if counter == 50 || counter == 0 || changed_phase { self.report_update_counter.store(1, Ordering::Release); spawn({ diff --git a/core/src/lib.rs b/core/src/lib.rs index 68910b61d613..7dd9d89c869c 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -243,10 +243,10 @@ impl Node { format!( "info,\ sd_core={level},\ - sd_p2p=debug,\ + sd_p2p={level},\ sd_core::location::manager=info,\ - sd_core_heavy_lifting=debug,\ - sd_task_system=debug,\ + sd_core_heavy_lifting={level},\ + sd_task_system={level},\ sd_ai={level}" ), ); diff --git a/core/src/location/manager/mod.rs b/core/src/location/manager/mod.rs index 787ef6519c56..00a234c23ca1 100644 --- a/core/src/location/manager/mod.rs +++ b/core/src/location/manager/mod.rs @@ -6,7 +6,6 @@ use crate::{ use sd_core_file_path_helper::FilePathError; -use sd_core_heavy_lifting::{indexer, JobSystemError}; use sd_prisma::prisma::location; use sd_utils::{db::MissingFieldError, error::FileIOError}; @@ -26,7 +25,6 @@ use tracing::{debug, error}; use uuid::Uuid; mod watcher; - mod helpers; #[derive(Clone, Copy, Debug)] @@ -105,9 +103,7 @@ pub enum LocationManagerError { MissingField(#[from] MissingFieldError), #[error(transparent)] - Indexer(#[from] indexer::Error), - #[error(transparent)] - JobSystem(#[from] JobSystemError), + JobSystem(#[from] sd_core_heavy_lifting::Error), #[error(transparent)] FileIO(#[from] FileIOError), } diff --git a/core/src/location/manager/watcher/utils.rs b/core/src/location/manager/watcher/utils.rs index 50b2380a37ff..080c21840429 100644 --- a/core/src/location/manager/watcher/utils.rs +++ b/core/src/location/manager/watcher/utils.rs @@ -1120,7 +1120,8 @@ pub(super) async fn recalculate_directories_size( &library.sync, &mut non_critical_errors, ) - .await?; + .await + .map_err(sd_core_heavy_lifting::Error::from)?; if !non_critical_errors.is_empty() { error!( diff --git a/core/src/location/mod.rs b/core/src/location/mod.rs index 213f0026b979..8a7df07d442e 100644 --- a/core/src/location/mod.rs +++ b/core/src/location/mod.rs @@ -1,9 +1,4 @@ -use crate::{ - context::NodeContext, - invalidate_query, - library::Library, - Node, -}; +use crate::{context::NodeContext, invalidate_query, library::Library, Node}; use sd_core_file_path_helper::{ filter_existing_file_path_params, IsolatedFilePathData, IsolatedFilePathDataParts, @@ -13,7 +8,7 @@ use sd_core_heavy_lifting::{ indexer::{self, job::Indexer}, job_system::report::ReportInputMetadata, media_processor::{self, job::MediaProcessor}, - JobEnqueuer, JobId, JobSystemError, + JobEnqueuer, JobId, }; use sd_core_prisma_helpers::{location_with_indexer_rules, CasId}; @@ -466,7 +461,7 @@ pub async fn scan_location( library: &Arc, location: location_with_indexer_rules::Data, location_scan_state: ScanState, -) -> Result, JobSystemError> { +) -> Result, sd_core_heavy_lifting::Error> { // TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup. if location.instance_id != Some(library.config().await.instance_id) { return Ok(None); @@ -486,19 +481,11 @@ pub async fn scan_location( ScanState::Pending | ScanState::Completed => { node.job_system .dispatch( - JobEnqueuer::new( - Indexer::new(location, None).map_err(sd_core_heavy_lifting::Error::from)?, - ) - .with_action("scan_location") - .with_metadata(ReportInputMetadata::Location(location_base_data.clone())) - .enqueue_next( - FileIdentifier::new(location_base_data.clone(), None) - .map_err(sd_core_heavy_lifting::Error::from)?, - ) - .enqueue_next( - MediaProcessor::new(location_base_data, None, false) - .map_err(sd_core_heavy_lifting::Error::from)?, - ), + JobEnqueuer::new(Indexer::new(location, None)?) + .with_action("scan_location") + .with_metadata(ReportInputMetadata::Location(location_base_data.clone())) + .enqueue_next(FileIdentifier::new(location_base_data.clone(), None)?) + .enqueue_next(MediaProcessor::new(location_base_data, None, false)?), location_id, ctx.clone(), ) @@ -507,16 +494,10 @@ pub async fn scan_location( ScanState::Indexed => { node.job_system .dispatch( - JobEnqueuer::new( - FileIdentifier::new(location_base_data.clone(), None) - .map_err(sd_core_heavy_lifting::Error::from)?, - ) - .with_action("scan_location_already_indexed") - .with_metadata(ReportInputMetadata::Location(location_base_data.clone())) - .enqueue_next( - MediaProcessor::new(location_base_data, None, false) - .map_err(sd_core_heavy_lifting::Error::from)?, - ), + JobEnqueuer::new(FileIdentifier::new(location_base_data.clone(), None)?) + .with_action("scan_location_already_indexed") + .with_metadata(ReportInputMetadata::Location(location_base_data.clone())) + .enqueue_next(MediaProcessor::new(location_base_data, None, false)?), location_id, ctx.clone(), ) @@ -525,10 +506,11 @@ pub async fn scan_location( ScanState::FilesIdentified => { node.job_system .dispatch( - JobEnqueuer::new( - MediaProcessor::new(location_base_data.clone(), None, false) - .map_err(sd_core_heavy_lifting::Error::from)?, - ) + JobEnqueuer::new(MediaProcessor::new( + location_base_data.clone(), + None, + false, + )?) .with_action("scan_location_files_already_identified") .with_metadata(ReportInputMetadata::Location(location_base_data)), location_id, @@ -546,7 +528,7 @@ pub async fn scan_location_sub_path( library: &Arc, location: location_with_indexer_rules::Data, sub_path: impl AsRef, -) -> Result, JobSystemError> { +) -> Result, sd_core_heavy_lifting::Error> { let sub_path = sub_path.as_ref().to_path_buf(); // TODO(N): This isn't gonna work with removable media and this will likely permanently break if the DB is restored from a backup. @@ -564,25 +546,24 @@ pub async fn scan_location_sub_path( node.job_system .dispatch( - JobEnqueuer::new( - Indexer::new(location, Some(sub_path.clone())) - .map_err(sd_core_heavy_lifting::Error::from)?, - ) - .with_action("scan_location") - .with_metadata(ReportInputMetadata::Location(location_base_data.clone())) - .with_metadata(ReportInputMetadata::SubPath(sub_path.clone())) - .enqueue_next( - FileIdentifier::new(location_base_data.clone(), Some(sub_path.clone())) - .map_err(sd_core_heavy_lifting::Error::from)?, - ) - .enqueue_next( - MediaProcessor::new(location_base_data, Some(sub_path), false) - .map_err(sd_core_heavy_lifting::Error::from)?, - ), + JobEnqueuer::new(Indexer::new(location, Some(sub_path.clone()))?) + .with_action("scan_location") + .with_metadata(ReportInputMetadata::Location(location_base_data.clone())) + .with_metadata(ReportInputMetadata::SubPath(sub_path.clone())) + .enqueue_next(FileIdentifier::new( + location_base_data.clone(), + Some(sub_path.clone()), + )?) + .enqueue_next(MediaProcessor::new( + location_base_data, + Some(sub_path), + false, + )?), location_id, ctx.clone(), ) .await + .map_err(Into::into) .map(Some) } diff --git a/core/src/util/debug_initializer.rs b/core/src/util/debug_initializer.rs index 0427049f1705..ebea51ebd222 100644 --- a/core/src/util/debug_initializer.rs +++ b/core/src/util/debug_initializer.rs @@ -11,7 +11,6 @@ use crate::{ Node, }; -use sd_core_heavy_lifting::JobSystemError; use sd_prisma::prisma::location; use sd_utils::error::FileIOError; @@ -77,7 +76,7 @@ pub enum InitConfigError { CurrentDir(io::Error), #[error(transparent)] - JobSystem(#[from] JobSystemError), + Processing(#[from] sd_core_heavy_lifting::Error), #[error(transparent)] FileIO(#[from] FileIOError), } diff --git a/crates/task-system/Cargo.toml b/crates/task-system/Cargo.toml index bb488e1943f9..1e97bdcd4f64 100644 --- a/crates/task-system/Cargo.toml +++ b/crates/task-system/Cargo.toml @@ -39,3 +39,4 @@ thiserror = { workspace = true } tokio = { workspace = true, features = ["macros", "test-util", "fs"] } tracing-test = { workspace = true, features = ["no-env-filter"] } uuid = { workspace = true, features = ["serde"] } +tracing-subscriber = { workspace = true, features = ["env-filter"] } diff --git a/crates/task-system/src/message.rs b/crates/task-system/src/message.rs index ddb67e57f1a3..723506e3fbb9 100644 --- a/crates/task-system/src/message.rs +++ b/crates/task-system/src/message.rs @@ -1,9 +1,11 @@ +use std::sync::Arc; + use async_channel as chan; use tokio::sync::oneshot; use super::{ error::{RunError, SystemError}, - task::{InternalTaskExecStatus, TaskId, TaskWorkState}, + task::{InternalTaskExecStatus, TaskId, TaskWorkState, TaskWorktable}, worker::WorkerId, }; @@ -13,22 +15,22 @@ pub enum SystemMessage { WorkingReport(WorkerId), ResumeTask { task_id: TaskId, - worker_id: WorkerId, + task_work_table: Arc, ack: oneshot::Sender>, }, PauseNotRunningTask { task_id: TaskId, - worker_id: WorkerId, + task_work_table: Arc, ack: oneshot::Sender>, }, CancelNotRunningTask { task_id: TaskId, - worker_id: WorkerId, - ack: oneshot::Sender<()>, + task_work_table: Arc, + ack: oneshot::Sender>, }, ForceAbortion { task_id: TaskId, - worker_id: WorkerId, + task_work_table: Arc, ack: oneshot::Sender>, }, ShutdownRequest(oneshot::Sender>), @@ -46,7 +48,7 @@ pub enum WorkerMessage { }, CancelNotRunningTask { task_id: TaskId, - ack: oneshot::Sender<()>, + ack: oneshot::Sender>, }, ForceAbortion { task_id: TaskId, @@ -54,6 +56,7 @@ pub enum WorkerMessage { }, ShutdownRequest(oneshot::Sender<()>), StealRequest { + stealer_id: WorkerId, ack: oneshot::Sender, stolen_task_tx: chan::Sender>>, }, diff --git a/crates/task-system/src/system.rs b/crates/task-system/src/system.rs index 0d78f9902ede..fff89ec5a111 100644 --- a/crates/task-system/src/system.rs +++ b/crates/task-system/src/system.rs @@ -1,6 +1,7 @@ use std::{ cell::RefCell, collections::HashSet, + convert::Infallible, fmt, future::Future, num::NonZeroUsize, @@ -15,13 +16,13 @@ use async_channel as chan; use futures::StreamExt; use futures_concurrency::future::Join; use tokio::{spawn, sync::oneshot, task::JoinHandle}; -use tracing::{error, info, instrument, trace, warn}; +use tracing::{error, info, instrument, trace, warn, Instrument}; use super::{ error::{RunError, SystemError}, message::SystemMessage, - task::{IntoTask, Task, TaskHandle, TaskId}, - worker::{AtomicWorkerId, WorkStealer, Worker, WorkerBuilder, WorkerId}, + task::{IntoTask, Task, TaskHandle, TaskId, TaskWorktable}, + worker::{AtomicWorkerId, WorkStealer, Worker, WorkerBuilder}, }; /// The task system is the main entry point for the library, it is responsible for creating and managing the workers @@ -116,11 +117,16 @@ impl System { } /// Dispatches a task to the system, the task will be assigned to a worker and executed as soon as possible. + #[allow(clippy::missing_panics_doc)] // SAFETY: doesn't panic pub async fn dispatch(&self, into_task: impl IntoTask) -> TaskHandle { - self.dispatcher.dispatch(into_task).await + self.dispatcher + .dispatch(into_task) + .await + .expect("infallible") } /// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible. + #[allow(clippy::missing_panics_doc)] // SAFETY: doesn't panic pub async fn dispatch_many> + Send>( &self, into_tasks: I, @@ -128,7 +134,10 @@ impl System { where ::IntoIter: Send, { - self.dispatcher.dispatch_many(into_tasks).await + self.dispatcher + .dispatch_many(into_tasks) + .await + .expect("infallible") } /// Returns a dispatcher that can be used to remotely dispatch tasks to the system. @@ -146,41 +155,50 @@ impl System { while let Some(msg) = msg_stream.next().await { match msg { SystemMessage::IdleReport(worker_id) => { - trace!(%worker_id, "Task system received a worker idle report request"); idle_workers[worker_id].store(true, Ordering::Relaxed); } SystemMessage::WorkingReport(worker_id) => { - trace!(%worker_id, "Task system received a working report request"); idle_workers[worker_id].store(false, Ordering::Relaxed); } SystemMessage::ResumeTask { task_id, - worker_id, + task_work_table, ack, - } => dispatch_resume_request(&workers, task_id, worker_id, ack), + } => dispatch_resume_request(&workers, task_id, task_work_table, ack), SystemMessage::PauseNotRunningTask { task_id, - worker_id, + task_work_table, ack, - } => dispatch_pause_not_running_task_request(&workers, task_id, worker_id, ack), + } => { + dispatch_pause_not_running_task_request( + &workers, + task_id, + task_work_table, + ack, + ); + } SystemMessage::CancelNotRunningTask { task_id, - worker_id, + task_work_table, + ack, + } => dispatch_cancel_not_running_task_request( + &workers, + task_id, + task_work_table, ack, - } => dispatch_cancel_not_running_task_request(&workers, task_id, worker_id, ack), + ), SystemMessage::ForceAbortion { task_id, - worker_id, + task_work_table, ack, - } => dispatch_force_abortion_task_request(&workers, task_id, worker_id, ack), + } => dispatch_force_abortion_task_request(&workers, task_id, task_work_table, ack), SystemMessage::ShutdownRequest(tx) => { - trace!("Task system received a shutdown request"); tx.send(Ok(())) .expect("System channel closed trying to shutdown"); return; @@ -236,54 +254,125 @@ impl System { fn dispatch_resume_request( workers: &Arc>>, task_id: TaskId, - worker_id: WorkerId, + task_work_table: Arc, ack: oneshot::Sender>, ) { trace!("Task system received a task resume request"); - spawn({ - let workers = Arc::clone(workers); - async move { - workers[worker_id].resume_task(task_id, ack).await; + spawn( + { + let workers = Arc::clone(workers); + async move { + let (tx, rx) = oneshot::channel(); + let first_attempt_worker_id = task_work_table.worker_id(); + workers[first_attempt_worker_id] + .resume_task(task_id, tx) + .await; + let res = rx + .await + .expect("Task system channel closed trying to resume not running task"); + + if matches!(res, Err(SystemError::TaskNotFound(_))) { + warn!( + %first_attempt_worker_id, + "Failed the first try to resume a not running task, trying again", + ); + workers[task_work_table.worker_id()] + .resume_task(task_id, ack) + .await; + } else { + ack.send(res) + .expect("System channel closed trying to resume not running task"); + } + } } - }); + .in_current_span(), + ); trace!("Task system resumed task"); } -#[instrument(skip(workers, ack))] +#[instrument(skip(workers, ack, task_work_table))] fn dispatch_pause_not_running_task_request( workers: &Arc>>, task_id: TaskId, - worker_id: WorkerId, + task_work_table: Arc, ack: oneshot::Sender>, ) { - trace!("Task system received a task pause request"); - spawn({ - let workers = Arc::clone(workers); - async move { - workers[worker_id] - .pause_not_running_task(task_id, ack) - .await; + spawn( + { + let workers: Arc>> = Arc::clone(workers); + + async move { + let (tx, rx) = oneshot::channel(); + let first_attempt_worker_id = task_work_table.worker_id(); + workers[first_attempt_worker_id] + .pause_not_running_task(task_id, tx) + .await; + let res = rx + .await + .expect("Task system channel closed trying to pause not running task"); + + if matches!(res, Err(SystemError::TaskNotFound(_))) { + warn!( + %first_attempt_worker_id, + "Failed the first try to pause a not running task, trying again", + ); + workers[task_work_table.worker_id()] + .pause_not_running_task(task_id, ack) + .await; + } else { + ack.send(res) + .expect("System channel closed trying to pause not running task"); + } + } } - }); - trace!("Task system paused task"); + .in_current_span(), + ); } #[instrument(skip(workers, ack))] fn dispatch_cancel_not_running_task_request( workers: &Arc>>, task_id: TaskId, - worker_id: WorkerId, - ack: oneshot::Sender<()>, + task_work_table: Arc, + ack: oneshot::Sender>, ) { trace!("Task system received a task cancel request"); - spawn({ - let workers = Arc::clone(workers); - async move { - workers[worker_id] - .cancel_not_running_task(task_id, ack) - .await; + spawn( + { + let workers = Arc::clone(workers); + async move { + let (tx, rx) = oneshot::channel(); + let first_attempt_worker_id = task_work_table.worker_id(); + workers[first_attempt_worker_id] + .cancel_not_running_task(task_id, tx) + .await; + let res = rx + .await + .expect("Task system channel closed trying to cancel a not running task"); + + if matches!(res, Err(SystemError::TaskNotFound(_))) { + if task_work_table.is_finalized() { + return ack + .send(Ok(())) + .expect("System channel closed trying to cancel a not running task"); + } + + warn!( + %first_attempt_worker_id, + "Failed the first try to cancel a not running task, trying again", + ); + workers[task_work_table.worker_id()] + .cancel_not_running_task(task_id, ack) + .await; + } else { + ack.send(res) + .expect("System channel closed trying to cancel not running task"); + } + } } - }); + .in_current_span(), + ); + trace!("Task system canceled task"); } @@ -291,16 +380,40 @@ fn dispatch_cancel_not_running_task_request( fn dispatch_force_abortion_task_request( workers: &Arc>>, task_id: TaskId, - worker_id: WorkerId, + task_work_table: Arc, ack: oneshot::Sender>, ) { trace!("Task system received a task force abortion request"); - spawn({ - let workers = Arc::clone(workers); - async move { - workers[worker_id].force_task_abortion(task_id, ack).await; + spawn( + { + let workers = Arc::clone(workers); + async move { + let (tx, rx) = oneshot::channel(); + let first_attempt_worker_id = task_work_table.worker_id(); + workers[first_attempt_worker_id] + .force_task_abortion(task_id, tx) + .await; + let res = rx.await.expect( + "Task system channel closed trying to force abortion of a not running task", + ); + + if matches!(res, Err(SystemError::TaskNotFound(_))) { + warn!( + %first_attempt_worker_id, + "Failed the first try to force abortion of a not running task, trying again", + ); + workers[task_work_table.worker_id()] + .force_task_abortion(task_id, ack) + .await; + } else { + ack.send(res).expect( + "System channel closed trying to force abortion of a not running task", + ); + } + } } - }); + .in_current_span(), + ); trace!("Task system aborted task"); } @@ -323,101 +436,116 @@ pub struct SystemComm(chan::Sender); impl SystemComm { pub fn idle_report(&self, worker_id: usize) { let system_tx = self.0.clone(); - spawn(async move { - system_tx - .send(SystemMessage::IdleReport(worker_id)) - .await - .expect("System channel closed trying to report idle"); - }); + spawn( + async move { + system_tx + .send(SystemMessage::IdleReport(worker_id)) + .await + .expect("System channel closed trying to report idle"); + } + .in_current_span(), + ); } pub fn working_report(&self, worker_id: usize) { let system_tx = self.0.clone(); - spawn(async move { - system_tx - .send(SystemMessage::WorkingReport(worker_id)) - .await - .expect("System channel closed trying to report working"); - }); + spawn( + async move { + system_tx + .send(SystemMessage::WorkingReport(worker_id)) + .await + .expect("System channel closed trying to report working"); + } + .in_current_span(), + ); } pub fn pause_not_running_task( &self, task_id: TaskId, - worker_id: WorkerId, - res_tx: oneshot::Sender>, + task_work_table: Arc, + ack: oneshot::Sender>, ) { let system_tx = self.0.clone(); - spawn(async move { - system_tx - .send(SystemMessage::PauseNotRunningTask { - task_id, - worker_id, - ack: res_tx, - }) - .await - .expect("System channel closed trying to pause not running task"); - }); + spawn( + async move { + system_tx + .send(SystemMessage::PauseNotRunningTask { + task_id, + task_work_table, + ack, + }) + .await + .expect("System channel closed trying to pause not running task"); + } + .in_current_span(), + ); } pub fn cancel_not_running_task( &self, task_id: TaskId, - worker_id: WorkerId, - res_tx: oneshot::Sender<()>, + task_work_table: Arc, + ack: oneshot::Sender>, ) { let system_tx = self.0.clone(); - - spawn(async move { - system_tx - .send(SystemMessage::CancelNotRunningTask { - task_id, - worker_id, - ack: res_tx, - }) - .await - .expect("System channel closed trying to cancel a not running task"); - }); + spawn( + async move { + system_tx + .send(SystemMessage::CancelNotRunningTask { + task_id, + task_work_table, + ack, + }) + .await + .expect("System channel closed trying to cancel a not running task"); + } + .in_current_span(), + ); } pub fn resume_task( &self, task_id: TaskId, - worker_id: WorkerId, - res_tx: oneshot::Sender>, + task_work_table: Arc, + ack: oneshot::Sender>, ) { let system_tx = self.0.clone(); - - spawn(async move { - system_tx - .send(SystemMessage::ResumeTask { - task_id, - worker_id, - ack: res_tx, - }) - .await - .expect("System channel closed trying to resume task"); - }); + spawn( + async move { + system_tx + .send(SystemMessage::ResumeTask { + task_id, + task_work_table, + ack, + }) + .await + .expect("System channel closed trying to resume task"); + } + .in_current_span(), + ); } pub fn force_abortion( &self, task_id: TaskId, - worker_id: WorkerId, - res_tx: oneshot::Sender>, + task_work_table: Arc, + ack: oneshot::Sender>, ) { let system_tx = self.0.clone(); - - spawn(async move { - system_tx - .send(SystemMessage::ForceAbortion { - task_id, - worker_id, - ack: res_tx, - }) - .await - .expect("System channel closed trying to resume task"); - }); + spawn( + async move { + system_tx + .send(SystemMessage::ForceAbortion { + task_id, + task_work_table, + ack, + }) + .await + .expect("System channel closed trying to resume task"); + } + .in_current_span(), + ); } } @@ -432,9 +560,21 @@ pub struct BaseDispatcher { last_worker_id: Arc, } +/// A trait that represents a dispatcher that can be used to dispatch tasks to the system. +/// It can be used to dispatch tasks to the system from other threads or tasks. +/// +/// The `E: RunError` error parameter is the error type that the dispatcher can return. +/// Although the [`BaseDispatcher`] which is the default implementation of this trait, will always returns +/// a [`Result`] with the [`TaskHandle`] in the [`Ok`] variant, it can be used to implement a custom +/// fallible dispatcher that returns an [`Err`] variant with a custom error type. pub trait Dispatcher: fmt::Debug + Clone + Send + Sync + 'static { + type DispatchError: RunError; + /// Dispatches a task to the system, the task will be assigned to a worker and executed as soon as possible. - fn dispatch(&self, into_task: impl IntoTask) -> impl Future> + Send { + fn dispatch( + &self, + into_task: impl IntoTask, + ) -> impl Future, Self::DispatchError>> + Send { self.dispatch_boxed(into_task.into_task()) } @@ -443,13 +583,13 @@ pub trait Dispatcher: fmt::Debug + Clone + Send + Sync + 'static { fn dispatch_boxed( &self, boxed_task: Box>, - ) -> impl Future> + Send; + ) -> impl Future, Self::DispatchError>> + Send; /// Dispatches many tasks to the system, the tasks will be assigned to workers and executed as soon as possible. fn dispatch_many> + Send>( &self, into_tasks: I, - ) -> impl Future>> + Send + ) -> impl Future>, Self::DispatchError>> + Send where I::IntoIter: Send, { @@ -461,7 +601,7 @@ pub trait Dispatcher: fmt::Debug + Clone + Send + Sync + 'static { fn dispatch_many_boxed( &self, boxed_tasks: impl IntoIterator>> + Send, - ) -> impl Future>> + Send; + ) -> impl Future>, Self::DispatchError>> + Send; } impl Clone for BaseDispatcher { @@ -475,12 +615,14 @@ impl Clone for BaseDispatcher { } impl Dispatcher for BaseDispatcher { - async fn dispatch(&self, into_task: impl IntoTask) -> TaskHandle { + type DispatchError = Infallible; + + async fn dispatch(&self, into_task: impl IntoTask) -> Result, Infallible> { self.dispatch_boxed(into_task.into_task()).await } #[allow(clippy::missing_panics_doc)] - async fn dispatch_boxed(&self, task: Box>) -> TaskHandle { + async fn dispatch_boxed(&self, task: Box>) -> Result, Infallible> { let worker_id = self .last_worker_id .fetch_update(Ordering::Release, Ordering::Acquire, |last_worker_id| { @@ -494,13 +636,13 @@ impl Dispatcher for BaseDispatcher { self.idle_workers[worker_id].store(false, Ordering::Relaxed); - handle + Ok(handle) } async fn dispatch_many_boxed( &self, into_tasks: impl IntoIterator>> + Send, - ) -> Vec> { + ) -> Result>, Infallible> { let (handles, workers_ids_set) = into_tasks .into_iter() .zip((0..self.workers.len()).cycle()) @@ -517,7 +659,7 @@ impl Dispatcher for BaseDispatcher { self.idle_workers[worker_id].store(false, Ordering::Relaxed); } - handles + Ok(handles) } } diff --git a/crates/task-system/src/task.rs b/crates/task-system/src/task.rs index 68028fc0e791..208968a9c1f1 100644 --- a/crates/task-system/src/task.rs +++ b/crates/task-system/src/task.rs @@ -1,9 +1,9 @@ use std::{ fmt, future::{Future, IntoFuture}, - pin::Pin, + pin::{pin, Pin}, sync::{ - atomic::{AtomicBool, AtomicU8, Ordering}, + atomic::{AtomicBool, Ordering}, Arc, }, task::{Context, Poll}, @@ -13,8 +13,9 @@ use std::{ use async_channel as chan; use async_trait::async_trait; use downcast_rs::{impl_downcast, Downcast}; +use futures::StreamExt; use tokio::{spawn, sync::oneshot}; -use tracing::{error, trace, warn}; +use tracing::{error, instrument, trace, warn, Instrument}; use uuid::Uuid; use super::{ @@ -193,7 +194,6 @@ where pub struct InterrupterFuture<'recv> { #[pin] fut: chan::Recv<'recv, InterruptionRequest>, - has_interrupted: &'recv AtomicU8, } impl Future for InterrupterFuture<'_> { @@ -204,6 +204,7 @@ impl Future for InterrupterFuture<'_> { match this.fut.poll(cx) { Poll::Ready(Ok(InterruptionRequest { kind, ack })) => { + trace!(?kind, "Running task received interruption request"); if ack.send(()).is_err() { warn!("TaskInterrupter ack channel closed"); } @@ -213,7 +214,6 @@ impl Future for InterrupterFuture<'_> { let kind = kind.into(); - this.has_interrupted.store(kind as u8, Ordering::Relaxed); Poll::Ready(kind) } Poll::Ready(Err(chan::RecvError)) => { @@ -237,7 +237,6 @@ impl<'recv> IntoFuture for &'recv Interrupter { fn into_future(self) -> Self::IntoFuture { InterrupterFuture { fut: self.interrupt_rx.recv(), - has_interrupted: &self.has_interrupted, } } } @@ -247,53 +246,68 @@ impl<'recv> IntoFuture for &'recv Interrupter { #[derive(Debug)] pub struct Interrupter { interrupt_rx: chan::Receiver, - has_interrupted: AtomicU8, +} + +impl Drop for Interrupter { + fn drop(&mut self) { + if !self.interrupt_rx.is_closed() { + self.close(); + } + } } impl Interrupter { pub(crate) fn new(interrupt_tx: chan::Receiver) -> Self { Self { interrupt_rx: interrupt_tx, - has_interrupted: AtomicU8::new(0), } } /// Check if the user requested a pause or a cancel, returning the kind of interruption that was requested /// in a non-blocking manner. pub fn try_check_interrupt(&self) -> Option { - InterruptionKind::load(&self.has_interrupted).map_or_else( - || { - if let Ok(InterruptionRequest { kind, ack }) = self.interrupt_rx.try_recv() { - if ack.send(()).is_err() { - warn!("TaskInterrupter ack channel closed"); - } + if let Ok(InterruptionRequest { kind, ack }) = self.interrupt_rx.try_recv() { + trace!(?kind, "Interrupter received interruption request"); - if let InternalInterruptionKind::Suspend(has_suspended) = &kind { - has_suspended.store(true, Ordering::SeqCst); - } + if let InternalInterruptionKind::Suspend(has_suspended) = &kind { + has_suspended.store(true, Ordering::SeqCst); + } - let kind = kind.into(); + let kind = kind.into(); - self.has_interrupted.store(kind as u8, Ordering::Relaxed); + if ack.send(()).is_err() { + warn!("TaskInterrupter ack channel closed"); + } - Some(kind) - } else { - None - } - }, - Some, - ) + Some(kind) + } else { + None + } } - pub(super) fn reset(&self) { - self.has_interrupted - .compare_exchange( - InterruptionKind::Pause as u8, - 0, - Ordering::Release, - Ordering::Relaxed, - ) - .expect("we must only reset paused tasks"); + pub(super) fn close(&self) { + self.interrupt_rx.close(); + if !self.interrupt_rx.is_empty() { + trace!("Pending interruption requests were not handled"); + spawn({ + let interrupt_rx = self.interrupt_rx.clone(); + + async move { + let mut interrupt_stream = pin!(interrupt_rx); + + while let Some(InterruptionRequest { kind, ack }) = + interrupt_stream.next().await + { + trace!( + ?kind, + "Interrupter received interruption request after task was completed" + ); + ack.send(()).expect("Interrupter ack channel closed"); + } + } + .in_current_span() + }); + } } } @@ -340,20 +354,9 @@ macro_rules! check_interruption { /// The kind of interruption that can be requested by the user, a pause or a cancel #[derive(Debug, Clone, Copy)] -#[repr(u8)] pub enum InterruptionKind { - Pause = 1, - Cancel = 2, -} - -impl InterruptionKind { - fn load(kind: &AtomicU8) -> Option { - match kind.load(Ordering::Relaxed) { - 1 => Some(Self::Pause), - 2 => Some(Self::Cancel), - _ => None, - } - } + Pause, + Cancel, } #[derive(Debug, Clone)] @@ -398,31 +401,37 @@ impl TaskRemoteController { /// # Panics /// /// Will panic if the worker failed to ack the pause request + #[instrument(skip(self), fields(task_id = %self.task_id), err)] pub async fn pause(&self) -> Result<(), SystemError> { - let is_paused = self.worktable.is_paused.load(Ordering::Relaxed); - let is_canceled = self.worktable.has_canceled.load(Ordering::Relaxed); - let is_done = self.worktable.is_done.load(Ordering::Relaxed); + if self.worktable.is_finalized() { + trace!("Task is finalized, will not pause"); + return Ok(()); + } + + let is_paused = self.worktable.is_paused.load(Ordering::Acquire); + let is_canceled = self.worktable.has_canceled.load(Ordering::Acquire); + let is_done = self.worktable.is_done.load(Ordering::Acquire); - trace!("Received pause command task: "); + trace!(%is_canceled, %is_done, "Received pause command task"); if !is_paused && !is_canceled && !is_done { - if self.worktable.is_running.load(Ordering::Relaxed) { + if self.worktable.is_running.load(Ordering::Acquire) { let (tx, rx) = oneshot::channel(); trace!("Task is running, sending pause request"); - self.worktable.pause(tx).await; + self.worktable.pause(tx); rx.await.expect("Worker failed to ack pause request"); } else { - trace!("Task is not running, setting is_paused flag"); - self.worktable.is_paused.store(true, Ordering::Relaxed); + trace!("Task is not running, setting is_paused flag and communicating with system"); + self.worktable.is_paused.store(true, Ordering::Release); let (tx, rx) = oneshot::channel(); self.system_comm.pause_not_running_task( self.task_id, - self.worktable.current_worker_id.load(Ordering::Relaxed), + Arc::clone(&self.worktable), tx, ); @@ -440,55 +449,68 @@ impl TaskRemoteController { /// # Panics /// /// Will panic if the worker failed to ack the cancel request - pub async fn cancel(&self) { - let is_canceled = self.worktable.has_canceled.load(Ordering::Relaxed); - let is_done = self.worktable.is_done.load(Ordering::Relaxed); + #[instrument(skip(self), fields(task_id = %self.task_id))] + pub async fn cancel(&self) -> Result<(), SystemError> { + if self.worktable.is_finalized() { + trace!("Task is finalized, will not cancel"); + return Ok(()); + } - trace!( - "Received cancel command task: ", - self.task_id - ); + let is_canceled = self.worktable.has_canceled(); + let is_done = self.worktable.is_done(); + + trace!(%is_canceled, %is_done, "Received cancel command task"); if !is_canceled && !is_done { - if self.worktable.is_running.load(Ordering::Relaxed) { + if self.worktable.is_running() { let (tx, rx) = oneshot::channel(); trace!("Task is running, sending cancel request"); - self.worktable.cancel(tx).await; + self.worktable.cancel(tx); rx.await.expect("Worker failed to ack cancel request"); } else { - trace!("Task is not running, setting is_canceled flag"); - self.worktable.has_canceled.store(true, Ordering::Relaxed); + trace!( + "Task is not running, setting is_canceled flag and communicating with system" + ); + self.worktable.has_canceled.store(true, Ordering::Release); let (tx, rx) = oneshot::channel(); self.system_comm.cancel_not_running_task( self.task_id, - self.worktable.current_worker_id.load(Ordering::Relaxed), + Arc::clone(&self.worktable), tx, ); - rx.await + return rx + .await .expect("Worker failed to ack cancel not running task request"); } } + + Ok(()) } /// Forcefully abort the task, this can lead to corrupted data or inconsistent states, so use it with caution. + /// /// # Panics + /// /// Will panic if the worker failed to ack the forced abortion request + #[instrument(skip(self), fields(task_id = %self.task_id), err)] pub async fn force_abortion(&self) -> Result<(), SystemError> { + if self.worktable.is_finalized() { + trace!("Task is finalized, will not force abortion"); + return Ok(()); + } + trace!("Received force abortion command task"); self.worktable.set_aborted(); let (tx, rx) = oneshot::channel(); - self.system_comm.force_abortion( - self.task_id, - self.worktable.current_worker_id.load(Ordering::Relaxed), - tx, - ); + self.system_comm + .force_abortion(self.task_id, Arc::clone(&self.worktable), tx); rx.await .expect("Worker failed to ack force abortion request") @@ -496,16 +518,22 @@ impl TaskRemoteController { /// Marks the task to be resumed by the task system, the worker will start processing it if there is a slot /// available or will be enqueued otherwise. + /// /// # Panics + /// /// Will panic if the worker failed to ack the resume request + #[instrument(skip(self), fields(task_id = %self.task_id), err)] pub async fn resume(&self) -> Result<(), SystemError> { + if self.worktable.is_finalized() { + trace!("Task is finalized, will not resume"); + return Ok(()); + } + trace!("Received resume command task"); + let (tx, rx) = oneshot::channel(); - self.system_comm.resume_task( - self.task_id, - self.worktable.current_worker_id.load(Ordering::Relaxed), - tx, - ); + self.system_comm + .resume_task(self.task_id, Arc::clone(&self.worktable), tx); rx.await.expect("Worker failed to ack resume request") } @@ -547,21 +575,13 @@ impl TaskHandle { } /// Gracefully pause the task at a safe point defined by the user using the [`Interrupter`] - /// - /// # Panics - /// - /// Will panic if the worker failed to ack the pause request pub async fn pause(&self) -> Result<(), SystemError> { self.controller.pause().await } /// Gracefully cancel the task at a safe point defined by the user using the [`Interrupter`] - /// - /// # Panics - /// - /// Will panic if the worker failed to ack the cancel request - pub async fn cancel(&self) { - self.controller.cancel().await; + pub async fn cancel(&self) -> Result<(), SystemError> { + self.controller.cancel().await } /// Forcefully abort the task, this can lead to corrupted data or inconsistent states, so use it with caution. @@ -617,7 +637,7 @@ impl Drop for CancelTaskOnDrop { fn drop(&mut self) { // FIXME: We should use async drop when it becomes stable if let Some(handle) = self.0.take() { - spawn(async move { handle.cancel().await }); + spawn(async move { handle.cancel().await }.in_current_span()); } } } @@ -633,6 +653,7 @@ pub struct TaskWorktable { has_shutdown: AtomicBool, has_failed: AtomicBool, interrupt_tx: chan::Sender, + finalized: AtomicBool, current_worker_id: AtomicWorkerId, } @@ -647,11 +668,23 @@ impl TaskWorktable { has_aborted: AtomicBool::new(false), has_shutdown: AtomicBool::new(false), has_failed: AtomicBool::new(false), + finalized: AtomicBool::new(false), interrupt_tx, current_worker_id: AtomicWorkerId::new(worker_id), } } + #[inline] + pub fn worker_id(&self) -> WorkerId { + self.current_worker_id.load(Ordering::Acquire) + } + + #[inline] + pub fn change_worker(&self, new_worker_id: WorkerId) { + self.current_worker_id + .store(new_worker_id, Ordering::Release); + } + pub fn set_started(&self) { self.started.store(true, Ordering::Relaxed); self.is_running.store(true, Ordering::Relaxed); @@ -686,71 +719,139 @@ impl TaskWorktable { self.is_running.store(false, Ordering::Relaxed); } - pub async fn pause(&self, tx: oneshot::Sender<()>) { - self.is_paused.store(true, Ordering::Relaxed); - self.is_running.store(false, Ordering::Relaxed); + pub fn set_finalized(&self) { + self.finalized.store(true, Ordering::Release); + } + + pub fn pause(self: &Arc, outer_tx: oneshot::Sender<()>) { + spawn({ + let this = Arc::clone(self); - trace!("Sending pause signal to Interrupter object on task"); + trace!("Sending pause signal to Interrupter object on task"); - self.interrupt_tx - .send(InterruptionRequest { - kind: InternalInterruptionKind::Pause, - ack: tx, - }) - .await - .expect("Worker channel closed trying to pause task"); + async move { + let (tx, rx) = oneshot::channel(); + + if this + .interrupt_tx + .send(InterruptionRequest { + kind: InternalInterruptionKind::Pause, + ack: tx, + }) + .await + .is_ok() + { + rx.await.expect("Task failed to ack pause request"); + + this.is_paused.store(true, Ordering::Release); + this.is_running.store(false, Ordering::Release); + } + + trace!("Sent pause signal to Interrupter object on task"); + + outer_tx + .send(()) + .expect("Worker channel closed trying to pause task"); + } + .in_current_span() + }); } - pub async fn suspend(&self, tx: oneshot::Sender<()>, has_suspended: Arc) { - self.is_paused.store(true, Ordering::Relaxed); - self.is_running.store(false, Ordering::Relaxed); + pub fn suspend( + self: &Arc, + outer_tx: oneshot::Sender<()>, + has_suspended: Arc, + ) { + trace!("Sending suspend signal to Interrupter object on task"); + spawn({ + let this = Arc::clone(self); + + async move { + let (tx, rx) = oneshot::channel(); - trace!("Sending pause signal to Interrupter object on task"); + if this + .interrupt_tx + .send(InterruptionRequest { + kind: InternalInterruptionKind::Suspend(has_suspended), + ack: tx, + }) + .await + .is_ok() + { + rx.await.expect("Task failed to ack suspend request"); + + this.is_paused.store(true, Ordering::Release); + this.is_running.store(false, Ordering::Release); + } - self.interrupt_tx - .send(InterruptionRequest { - kind: InternalInterruptionKind::Suspend(has_suspended), - ack: tx, - }) - .await - .expect("Interrupter channel closed trying to pause task"); + outer_tx + .send(()) + .expect("Worker channel closed trying to suspend task"); + } + .in_current_span() + }); } - pub async fn cancel(&self, tx: oneshot::Sender<()>) { - self.interrupt_tx - .send(InterruptionRequest { - kind: InternalInterruptionKind::Cancel, - ack: tx, - }) - .await - .expect("Interrupter channel closed trying to pause task"); + pub fn cancel(self: &Arc, outer_tx: oneshot::Sender<()>) { + trace!("Sending cancel signal to Interrupter object on task"); + spawn({ + let this = Arc::clone(self); + async move { + let (tx, rx) = oneshot::channel(); - self.has_canceled.store(true, Ordering::Relaxed); - self.is_running.store(false, Ordering::Relaxed); + if this + .interrupt_tx + .send(InterruptionRequest { + kind: InternalInterruptionKind::Cancel, + ack: tx, + }) + .await + .is_ok() + { + rx.await.expect("Task failed to ack cancel request"); + + this.has_canceled.store(true, Ordering::Release); + this.is_running.store(false, Ordering::Release); + } + + outer_tx + .send(()) + .expect("Worker channel closed trying to cancel task"); + } + .in_current_span() + }); } pub fn is_done(&self) -> bool { - self.is_done.load(Ordering::Relaxed) + self.is_done.load(Ordering::Acquire) + } + + pub fn is_running(&self) -> bool { + self.is_running.load(Ordering::Acquire) } pub fn is_paused(&self) -> bool { - self.is_paused.load(Ordering::Relaxed) + self.is_paused.load(Ordering::Acquire) } pub fn has_canceled(&self) -> bool { - self.has_canceled.load(Ordering::Relaxed) + self.has_canceled.load(Ordering::Acquire) } pub fn has_failed(&self) -> bool { - self.has_failed.load(Ordering::Relaxed) + self.has_failed.load(Ordering::Acquire) } pub fn has_aborted(&self) -> bool { - self.has_aborted.load(Ordering::Relaxed) + self.has_aborted.load(Ordering::Acquire) } pub fn has_shutdown(&self) -> bool { - self.has_shutdown.load(Ordering::Relaxed) + self.has_shutdown.load(Ordering::Acquire) + } + + pub fn is_finalized(&self) -> bool { + self.finalized.load(Ordering::Acquire) } } @@ -788,17 +889,6 @@ impl TaskWorkState { pub fn kind(&self) -> PendingTaskKind { PendingTaskKind::with_priority(self.task.with_priority()) } - - pub fn worker_id(&self) -> WorkerId { - self.worktable.current_worker_id.load(Ordering::Relaxed) - } - - #[inline] - pub fn change_worker(&self, new_worker_id: WorkerId) { - self.worktable - .current_worker_id - .store(new_worker_id, Ordering::Relaxed); - } } #[derive(Debug)] @@ -830,10 +920,13 @@ impl PanicOnSenderDrop { } impl Drop for PanicOnSenderDrop { + #[track_caller] fn drop(&mut self) { + trace!(task_id = %self.task_id, "Dropping TaskWorkState"); assert!( self.maybe_done_tx.is_none(), - "TaskHandle done channel dropped before sending a result" + "TaskHandle done channel dropped before sending a result: {}", + std::panic::Location::caller() ); trace!(task_id = %self.task_id, "TaskWorkState successfully dropped" diff --git a/crates/task-system/src/worker/mod.rs b/crates/task-system/src/worker/mod.rs index 2891fdeb0f99..c8b29bc6bc16 100644 --- a/crates/task-system/src/worker/mod.rs +++ b/crates/task-system/src/worker/mod.rs @@ -6,7 +6,7 @@ use std::{ use async_channel as chan; use tokio::{spawn, sync::oneshot, task::JoinHandle}; -use tracing::{error, info, instrument, trace, warn}; +use tracing::{error, info, instrument, trace, warn, Instrument}; use super::{ error::{RunError, SystemError}, @@ -87,6 +87,7 @@ impl WorkerBuilder { info!("Worker gracefully shutdown"); } + .in_current_span() }); Worker { @@ -158,7 +159,11 @@ impl Worker { .expect("Worker channel closed trying to pause a not running task"); } - pub async fn cancel_not_running_task(&self, task_id: TaskId, ack: oneshot::Sender<()>) { + pub async fn cancel_not_running_task( + &self, + task_id: TaskId, + ack: oneshot::Sender>, + ) { self.msgs_tx .send(WorkerMessage::CancelNotRunningTask { task_id, ack }) .await @@ -217,12 +222,14 @@ pub struct WorkerComm { impl WorkerComm { pub async fn steal_task( &self, + stealer_id: WorkerId, stolen_task_tx: chan::Sender>>, ) -> bool { let (tx, rx) = oneshot::channel(); self.msgs_tx .send(WorkerMessage::StealRequest { + stealer_id, ack: tx, stolen_task_tx, }) @@ -256,7 +263,7 @@ impl WorkStealer { #[instrument(skip(self, stolen_task_tx))] pub async fn steal( &self, - stealer_worker_id: WorkerId, + stealer_id: WorkerId, stolen_task_tx: &chan::Sender>>, ) { let total_workers = self.worker_comms.len(); @@ -267,26 +274,21 @@ impl WorkStealer { // Cycling over the workers .cycle() // Starting from the next worker id - .skip(stealer_worker_id) + .skip(stealer_id) // Taking the total amount of workers .take(total_workers) // Removing the current worker as we can't steal from ourselves - .filter(|worker_comm| worker_comm.worker_id != stealer_worker_id) + .filter(|worker_comm| worker_comm.worker_id != stealer_id) { - trace!(stolen_worker_id = worker_comm.worker_id, "Trying to steal",); - - if worker_comm.steal_task(stolen_task_tx.clone()).await { + if worker_comm + .steal_task(stealer_id, stolen_task_tx.clone()) + .await + { trace!(stolen_worker_id = worker_comm.worker_id, "Stole a task"); return; } - - trace!( - stolen_worker_id = worker_comm.worker_id, - "No tasks to steal" - ); } - trace!("No workers have tasks to steal"); stolen_task_tx .send(None) .await diff --git a/crates/task-system/src/worker/run.rs b/crates/task-system/src/worker/run.rs index bff430e1d081..9b2ed3c46d94 100644 --- a/crates/task-system/src/worker/run.rs +++ b/crates/task-system/src/worker/run.rs @@ -68,15 +68,12 @@ pub(super) async fn run( if ack.send(runner.pause_not_running_task(task_id)).is_err() { warn!("Resume task channel closed before sending ack"); } - trace!(%task_id, "Paused not running task response sent"); } StreamMessage::Commands(WorkerMessage::CancelNotRunningTask { task_id, ack }) => { - runner.cancel_not_running_task(&task_id); - if ack.send(()).is_err() { + if ack.send(runner.cancel_not_running_task(&task_id)).is_err() { warn!("Resume task channel closed before sending ack"); } - trace!(%task_id, "Cancel not running task response sent"); } StreamMessage::Commands(WorkerMessage::ForceAbortion { task_id, ack }) => { @@ -95,24 +92,21 @@ pub(super) async fn run( } StreamMessage::Commands(WorkerMessage::StealRequest { + stealer_id, ack, stolen_task_tx, }) => { - trace!("Steal task request received"); if ack - .send(runner.steal_request(stolen_task_tx).await) + .send(runner.steal_request(stealer_id, stolen_task_tx).await) .is_err() { debug!("Steal request attempt aborted before sending ack"); } - trace!("Steal task request completed"); } // Runner messages StreamMessage::TaskOutput(TaskOutputMessage(task_id, Ok(output))) => { - trace!(%task_id, "Process task output request received"); runner.process_task_output(&task_id, output).await; - trace!(%task_id, "Processed task output"); } StreamMessage::TaskOutput(TaskOutputMessage(task_id, Err(()))) => { @@ -123,14 +117,7 @@ pub(super) async fn run( } StreamMessage::Steal(maybe_stolen_task) => { - trace!( - maybe_task_id = ?maybe_stolen_task - .as_ref() - .map(|StoleTaskMessage(task_work_state)| task_work_state.id()), - "Received stolen task request" - ); runner.process_stolen_task(maybe_stolen_task).await; - trace!("Processed stolen task"); } // Idle checking to steal some work diff --git a/crates/task-system/src/worker/runner.rs b/crates/task-system/src/worker/runner.rs index c471985da662..d99981558c90 100644 --- a/crates/task-system/src/worker/runner.rs +++ b/crates/task-system/src/worker/runner.rs @@ -18,7 +18,7 @@ use tokio::{ task::{JoinError, JoinHandle}, time::{sleep, timeout, Instant}, }; -use tracing::{debug, error, instrument, trace, warn}; +use tracing::{debug, error, instrument, trace, warn, Instrument}; use super::{ super::{ @@ -162,12 +162,15 @@ impl Runner { }, ); - let handle = spawn(run_single_task( - task_work_state, - self.task_output_tx.clone(), - suspend_rx, - abort_rx, - )); + let handle = spawn( + run_single_task( + task_work_state, + self.task_output_tx.clone(), + suspend_rx, + abort_rx, + ) + .in_current_span(), + ); trace!("Task runner spawned"); @@ -223,8 +226,6 @@ impl Runner { #[instrument(skip(self))] pub(super) fn pause_not_running_task(&mut self, task_id: TaskId) -> Result<(), SystemError> { - trace!("Pause not running task request"); - if self.paused_tasks.contains_key(&task_id) { trace!("Task is already paused"); return Ok(()); @@ -298,7 +299,7 @@ impl Runner { } #[instrument(skip(self))] - pub(super) fn cancel_not_running_task(&mut self, task_id: &TaskId) { + pub(super) fn cancel_not_running_task(&mut self, task_id: &TaskId) -> Result<(), SystemError> { trace!("Cancel not running task request"); if let Some(current_task) = &self.current_task_handle { @@ -306,7 +307,7 @@ impl Runner { trace!( "Task began to run before we managed to cancel it, run function will cancel it" ); - return; // The task will cancel itself + return Ok(()); // The task will cancel itself } } @@ -320,18 +321,22 @@ impl Runner { send_cancel_task_response(self.suspended_task.take().expect("we just checked it")); - return; + return Ok(()); } } - self.cancel_task_from_queues(task_id); + if self.cancel_task_from_queues(task_id) { + return Ok(()); + } + + Err(SystemError::TaskNotFound(*task_id)) // If the task is not found, then it's possible that the user already canceled it but still have the handle } #[instrument(skip(self))] #[inline] - fn cancel_task_from_queues(&mut self, task_id: &TaskId) { + fn cancel_task_from_queues(&mut self, task_id: &TaskId) -> bool { if let Some(index) = self .priority_tasks .iter() @@ -343,7 +348,7 @@ impl Runner { .expect("we just checked it"), ); - return; + return true; } if let Some(index) = self @@ -352,7 +357,17 @@ impl Runner { .position(|task_work_state| task_work_state.id() == *task_id) { send_cancel_task_response(self.tasks.remove(index).expect("we just checked it")); + + return true; } + + if let Some(task_work_state) = self.paused_tasks.remove(task_id) { + send_cancel_task_response(task_work_state); + + return true; + } + + false } #[instrument(skip(self, task_work_state))] @@ -553,7 +568,6 @@ impl Runner { self.abort_steal_task(); let Self { - worker_id, tasks, suspended_task, paused_tasks, @@ -618,7 +632,6 @@ impl Runner { task_output_tx.close(); Self::process_tasks_being_suspended_on_shutdown( - worker_id, suspend_on_shutdown_stole_task_rx, suspend_on_shutdown_task_output_rx, ) @@ -641,7 +654,6 @@ impl Runner { } async fn process_tasks_being_suspended_on_shutdown( - worker_id: WorkerId, suspend_on_shutdown_stole_task_rx: chan::Receiver>>, suspend_on_shutdown_task_output_rx: chan::Receiver>, ) { @@ -686,11 +698,9 @@ impl Runner { StreamMessage::Steal(Some(StoleTaskMessage(task_work_state))) => { trace!( - stolen_worker_id = %task_work_state.worker_id(), task_id = %task_work_state.id(), "Stole task", ); - task_work_state.change_worker(worker_id); send_shutdown_task_response(task_work_state); } @@ -706,7 +716,6 @@ impl Runner { } if let Some(task) = self.suspended_task.take() { - task.interrupter.reset(); task.worktable.set_unpause(); return Some((PendingTaskKind::Suspended, task)); } @@ -716,23 +725,47 @@ impl Runner { .map(|task| (PendingTaskKind::Normal, task)) } - #[instrument(skip(self))] + #[instrument(skip_all)] pub(super) async fn steal_request( &mut self, + stealer_id: WorkerId, stolen_task_tx: chan::Sender>>, ) -> bool { - trace!("Steal request"); - if let Some((kind, task_work_state)) = self.get_next_task() { + while let Some((kind, task_work_state)) = self.get_next_task() { let task_id = task_work_state.id(); self.task_kinds.remove(&task_id); trace!(%task_id, ?kind, "Task being stolen"); + if task_work_state.worktable.has_canceled() { + trace!(%task_id, "Task was canceled before we could steal it"); + send_cancel_task_response(task_work_state); + continue; + } + + if task_work_state.worktable.has_aborted() { + trace!(%task_id, "Task was force aborted before we could steal it"); + send_forced_abortion_task_response(task_work_state); + continue; + } + + if task_work_state.worktable.is_paused() { + trace!(%task_id, "Task was paused before we could steal it"); + self.task_kinds.insert(task_id, kind); + self.paused_tasks.insert(task_id, task_work_state); + continue; + } + + trace!(%task_id, ?kind, "Task being stolen"); + + task_work_state.worktable.change_worker(stealer_id); + if let Err(chan::SendError(Some(StoleTaskMessage(task_work_state)))) = stolen_task_tx .send(Some(StoleTaskMessage(task_work_state))) .await { warn!("Steal request channel closed before sending task"); + task_work_state.worktable.change_worker(self.worker_id); match kind { PendingTaskKind::Normal => self.tasks.push_front(task_work_state), PendingTaskKind::Priority => self.priority_tasks.push_front(task_work_state), @@ -747,21 +780,18 @@ impl Runner { self.task_kinds.insert(task_id, kind); - false - } else { - true + return false; } - } else { - trace!("No task to steal"); - false + + return true; // Successfully stole the task } + + false // No task to steal } #[instrument(skip(self))] #[inline] pub(super) async fn dispatch_next_task(&mut self, finished_task_id: &TaskId) { - trace!("Task finished and will try to process a new task"); - self.abort_and_suspend_map.remove(finished_task_id); let RunningTask { @@ -776,11 +806,9 @@ impl Runner { assert_eq!(*finished_task_id, old_task_id, "Task output id mismatch"); // Sanity check - trace!("Waiting task handle",); if let Err(e) = handle.await { error!(?e, "Task failed to join"); } - trace!("Waited task handle"); if let Some((next_task_kind, task_work_state)) = self.get_next_task() { let next_task_id = task_work_state.id(); @@ -795,21 +823,15 @@ impl Runner { handle, }); } else { - trace!("No task to dispatch, worker is now idle and will dispatch a steal request"); - self.is_idle = true; self.system_comm.idle_report(self.worker_id); - trace!("Worker reported idle status"); - if self.current_steal_task_handle.is_none() { self.current_steal_task_handle = Some(dispatch_steal_request( self.worker_id, self.work_stealer.clone(), self.stole_task_tx.clone(), )); - } else { - trace!("Steal task already running"); } } } @@ -856,20 +878,14 @@ impl Runner { } } - trace!("Processing task output completed and will try to dispatch a new task"); - self.dispatch_next_task(task_id).await; } #[instrument(skip(self))] pub(super) fn idle_check(&mut self) { if self.is_idle { - trace!("Worker is idle for some time and will try to steal a task"); - if self.current_steal_task_handle.is_none() { self.steal_attempt(); - } else { - trace!("Steal task already running, ignoring on this idle check"); } self.idle_memory_cleanup(); @@ -881,8 +897,6 @@ impl Runner { let elapsed = self.last_steal_attempt_at.elapsed(); let required = (TEN_SECONDS * self.steal_attempts_count).min(ONE_MINUTE); - trace!(?elapsed, ?required, "Steal attempt required cool down"); - if elapsed > required { self.current_steal_task_handle = Some(dispatch_steal_request( self.worker_id, @@ -890,8 +904,6 @@ impl Runner { self.stole_task_tx.clone(), )); self.last_steal_attempt_at = Instant::now(); - } else { - trace!("Steal attempt still cooling down"); } } @@ -933,8 +945,6 @@ impl Runner { if let Some(steal_task_handle) = self.current_steal_task_handle.take() { steal_task_handle.abort(); trace!("Aborted steal task"); - } else { - trace!("No steal task to abort"); } } @@ -1055,6 +1065,7 @@ fn handle_run_task_attempt( } } } + .in_current_span() }) } @@ -1063,24 +1074,27 @@ fn handle_task_suspension( worktable: Arc, suspend_rx: oneshot::Receiver<()>, ) -> JoinHandle<()> { - spawn(async move { - if suspend_rx.await.is_ok() { - let (tx, rx) = oneshot::channel(); + spawn( + async move { + if suspend_rx.await.is_ok() { + let (tx, rx) = oneshot::channel(); - trace!("Suspend signal received"); + trace!("Suspend signal received"); - worktable.suspend(tx, has_suspended).await; + worktable.suspend(tx, has_suspended); - if rx.await.is_ok() { - trace!("Suspending"); + if rx.await.is_ok() { + trace!("Suspending"); + } else { + // The task probably finished before we could suspend it so the channel was dropped + trace!("Suspend channel closed"); + } } else { - // The task probably finished before we could suspend it so the channel was dropped - trace!("Suspend channel closed"); + trace!("Suspend channel closed, task probably finished before we could suspend it"); } - } else { - trace!("Suspend channel closed, task probably finished before we could suspend it"); } - }) + .in_current_span(), + ) } type PartialTaskWorkState = ( @@ -1105,10 +1119,21 @@ async fn emit_task_completed_message( let mut internal_status = res.into(); let suspended = has_suspended.load(Ordering::SeqCst); - if matches!(internal_status, InternalTaskExecStatus::Paused) && suspended { - internal_status = InternalTaskExecStatus::Suspend; - } else if matches!(internal_status, InternalTaskExecStatus::Paused) { - debug!(?internal_status, "Task completed with status"); + match internal_status { + InternalTaskExecStatus::Paused if suspended => { + internal_status = InternalTaskExecStatus::Suspend; + } + + InternalTaskExecStatus::Paused | InternalTaskExecStatus::Suspend => { + /* Nothing to do */ + } + + InternalTaskExecStatus::Done(_) + | InternalTaskExecStatus::Canceled + | InternalTaskExecStatus::Error(_) => { + trace!(?internal_status, "Task completed, closing interrupter"); + interrupter.close(); + } } Ok(TaskRunnerOutput { @@ -1207,6 +1232,7 @@ async fn run_single_task( } RaceOutput::Completed(Err(join_error)) => { + interrupter.close(); error!(?join_error, "Task failed to join"); if done_tx.send(Err(SystemError::TaskJoin(task_id))).is_err() { error!("Task done channel closed while sending join error response"); @@ -1249,12 +1275,9 @@ async fn run_single_task( } if !suspender_handle.is_finished() { - trace!("Aborting suspender handler as it isn't needed anymore"); // if we received a suspend signal this abort will do nothing, as the task finished already suspender_handle.abort(); } - - trace!("Run single task finished"); } #[instrument(skip(task, done_tx, worktable, out), fields(task_id = %task.id()))] @@ -1268,13 +1291,14 @@ fn send_complete_task_response( out: TaskOutput, ) { worktable.set_completed(); + worktable.set_finalized(); if done_tx .send(Ok(TaskStatus::Done((task.id(), out)))) .is_err() { warn!("Task done channel closed before sending done response for task"); } else { - trace!("Emitted task done signal on shutdown"); + trace!("Emitted task done signal on task completion"); } } @@ -1288,10 +1312,11 @@ fn send_cancel_task_response( }: TaskWorkState, ) { worktable.set_canceled(); + worktable.set_finalized(); if done_tx.send(Ok(TaskStatus::Canceled)).is_err() { warn!("Task done channel closed before sending canceled response for task"); } else { - trace!("Emitted task canceled signal on cancel not running task"); + trace!("Emitted task canceled signal on cancel request"); } } @@ -1305,6 +1330,7 @@ fn send_shutdown_task_response( }: TaskWorkState, ) { worktable.set_shutdown(); + worktable.set_finalized(); if done_tx.send(Ok(TaskStatus::Shutdown(task))).is_err() { warn!("Task done channel closed before sending shutdown response for task"); } else { @@ -1323,10 +1349,11 @@ fn send_error_task_response( e: E, ) { worktable.set_completed(); + worktable.set_finalized(); if done_tx.send(Ok(TaskStatus::Error(e))).is_err() { warn!("Task done channel closed before sending error response for task"); } else { - trace!("Emitted task error signal on shutdown"); + trace!("Emitted task error signal"); } } @@ -1340,10 +1367,11 @@ fn send_forced_abortion_task_response( }: TaskWorkState, ) { worktable.set_aborted(); + worktable.set_finalized(); if done_tx.send(Ok(TaskStatus::ForcedAbortion)).is_err() { warn!("Task done channel closed before sending forced abortion response for task"); } else { - trace!("Emitted task forced abortion signal on cancel not running task"); + trace!("Emitted task forced abortion signal"); } } @@ -1352,5 +1380,5 @@ fn dispatch_steal_request( work_stealer: WorkStealer, stole_task_tx: chan::Sender>>, ) -> JoinHandle<()> { - spawn(async move { work_stealer.steal(worker_id, &stole_task_tx).await }) + spawn(async move { work_stealer.steal(worker_id, &stole_task_tx).await }.in_current_span()) } diff --git a/crates/task-system/tests/common/actors.rs b/crates/task-system/tests/common/actors.rs index 37bcbcefcc8a..f05f846bebf5 100644 --- a/crates/task-system/tests/common/actors.rs +++ b/crates/task-system/tests/common/actors.rs @@ -83,6 +83,7 @@ impl SampleActor { paused_count, )) .await + .expect("infallible") } else { task_dispatcher .dispatch(SampleActorTask::with_id( @@ -92,6 +93,7 @@ impl SampleActor { paused_count, )) .await + .expect("infallible") }) .await .expect("Task handle receiver dropped"); @@ -121,8 +123,12 @@ impl SampleActor { self.task_dispatcher .dispatch(self.new_priority_task(duration)) .await + .expect("infallible") } else { - self.task_dispatcher.dispatch(self.new_task(duration)).await + self.task_dispatcher + .dispatch(self.new_task(duration)) + .await + .expect("infallible") }) .await .expect("Task handle receiver dropped"); diff --git a/crates/task-system/tests/common/jobs.rs b/crates/task-system/tests/common/jobs.rs index 7055c347304b..61c7c507b71b 100644 --- a/crates/task-system/tests/common/jobs.rs +++ b/crates/task-system/tests/common/jobs.rs @@ -41,6 +41,7 @@ impl SampleJob { task_dispatcher .dispatch_many(initial_steps) .await + .expect("infallible") .into_iter(), ) .lend_mut(); @@ -108,7 +109,8 @@ impl Task for SampleJobTask { expected_children: self.expected_children - 1, task_dispatcher: self.task_dispatcher.clone(), }) - .await, + .await + .expect("infallible"), } .into_output(), )) diff --git a/crates/task-system/tests/common/tasks.rs b/crates/task-system/tests/common/tasks.rs index 3d556ee07c5c..6f27d76389cd 100644 --- a/crates/task-system/tests/common/tasks.rs +++ b/crates/task-system/tests/common/tasks.rs @@ -1,17 +1,22 @@ -use std::{future::pending, time::Duration}; +use std::{ + future::{pending, IntoFuture}, + time::Duration, +}; use sd_task_system::{ ExecStatus, Interrupter, InterruptionKind, IntoAnyTaskOutput, Task, TaskId, TaskOutput, }; +use async_channel as chan; use async_trait::async_trait; +use futures::FutureExt; use futures_concurrency::future::Race; use thiserror::Error; use tokio::{ sync::oneshot, time::{sleep, Instant}, }; -use tracing::{error, info}; +use tracing::{error, info, instrument}; #[derive(Debug, Error)] pub enum SampleError { @@ -215,6 +220,7 @@ impl Task for PauseOnceTask { self.id } + #[instrument(skip(self, interrupter), fields(task_id = %self.id))] async fn run(&mut self, interrupter: &Interrupter) -> Result { if let Some(began_tx) = self.began_tx.take() { if began_tx.send(()).is_err() { @@ -224,6 +230,7 @@ impl Task for PauseOnceTask { if !self.has_paused { self.has_paused = true; + info!("waiting for pause"); match interrupter.await { InterruptionKind::Pause => { info!("Pausing PauseOnceTask ", self.id); @@ -276,3 +283,59 @@ impl Task for BrokenTask { pending().await } } + +#[derive(Debug)] +pub struct WaitSignalTask { + id: TaskId, + signal_rx: chan::Receiver<()>, +} + +impl WaitSignalTask { + pub fn new() -> (Self, chan::Sender<()>) { + let (signal_tx, signal_rx) = chan::bounded(1); + ( + Self { + id: TaskId::new_v4(), + signal_rx, + }, + signal_tx, + ) + } +} + +#[async_trait] +impl Task for WaitSignalTask { + fn id(&self) -> TaskId { + self.id + } + + #[instrument(skip(self, interrupter), fields(task_id = %self.id))] + async fn run(&mut self, interrupter: &Interrupter) -> Result { + enum RaceOutput { + Signal, + Interrupt(InterruptionKind), + } + + let race = ( + self.signal_rx.recv().map(|res| { + res.unwrap(); + RaceOutput::Signal + }), + interrupter.into_future().map(RaceOutput::Interrupt), + ); + + match race.race().await { + RaceOutput::Signal => Ok(ExecStatus::Done(TaskOutput::Empty)), + RaceOutput::Interrupt(kind) => match kind { + InterruptionKind::Pause => { + info!("Paused"); + Ok(ExecStatus::Paused) + } + InterruptionKind::Cancel => { + info!("Canceled"); + Ok(ExecStatus::Canceled) + } + }, + } + } +} diff --git a/crates/task-system/tests/integration_test.rs b/crates/task-system/tests/integration_test.rs index db563754d893..2a0ba4fcd1eb 100644 --- a/crates/task-system/tests/integration_test.rs +++ b/crates/task-system/tests/integration_test.rs @@ -1,4 +1,4 @@ -use sd_task_system::{TaskOutput, TaskStatus, TaskSystem}; +use sd_task_system::{TaskHandle, TaskOutput, TaskStatus, TaskSystem}; use std::{collections::VecDeque, time::Duration}; @@ -6,13 +6,16 @@ use futures_concurrency::future::Join; use rand::Rng; use tempfile::tempdir; use tracing::info; +use tracing_subscriber::EnvFilter; use tracing_test::traced_test; mod common; use common::{ actors::SampleActor, - tasks::{BogusTask, BrokenTask, NeverTask, PauseOnceTask, ReadyTask, SampleError}, + tasks::{ + BogusTask, BrokenTask, NeverTask, PauseOnceTask, ReadyTask, SampleError, WaitSignalTask, + }, }; use crate::common::jobs::SampleJob; @@ -72,7 +75,7 @@ async fn cancel_test() { let handle = system.dispatch(NeverTask::default()).await; info!("issuing cancel"); - handle.cancel().await; + handle.cancel().await.unwrap(); assert!(matches!(handle.await, Ok(TaskStatus::Canceled))); @@ -156,6 +159,83 @@ async fn pause_test() { system.shutdown().await; } +#[test] +fn many_pauses_test() { + std::env::set_var("RUST_LOG", "info,sd_task_system=error"); + + tracing_subscriber::fmt() + .with_file(true) + .with_line_number(true) + .with_env_filter(EnvFilter::from_default_env()) + .init(); + + std::thread::spawn(|| { + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap() + .block_on(async move { + let system = TaskSystem::::new(); + + let (tasks, signalers) = (0..50) + .map(|_| WaitSignalTask::new()) + .unzip::<_, _, Vec<_>, Vec<_>>(); + + info!(total_tasks = %tasks.len()); + + let handles = system.dispatch_many(tasks).await; + + info!("all tasks dispatched"); + + for i in 1..=20 { + handles + .iter() + .map(TaskHandle::pause) + .collect::>() + .join() + .await; + + info!(%i, "all tasks paused"); + + handles + .iter() + .map(TaskHandle::resume) + .collect::>() + .join() + .await; + + info!(%i, "all tasks resumed"); + } + + signalers + .into_iter() + .enumerate() + .map(|(task_idx, signal_tx)| async move { + signal_tx.send(()).await.unwrap_or_else(|e| { + panic!("failed to send signal for task {task_idx}: {e:#?}") + }) + }) + .collect::>() + .join() + .await; + + info!("all tasks signaled for completion"); + + assert!(handles + .join() + .await + .into_iter() + .all(|res| matches!(res, Ok(TaskStatus::Done((_task_id, TaskOutput::Empty)))))); + + info!("all tasks done"); + + system.shutdown().await; + }) + }) + .join() + .unwrap(); +} + #[tokio::test] #[traced_test] async fn jobs_test() { diff --git a/interface/app/$libraryId/Layout/Sidebar/JobManager/JobGroup.tsx b/interface/app/$libraryId/Layout/Sidebar/JobManager/JobGroup.tsx index 69e7ea1782dd..b68449c8ec34 100644 --- a/interface/app/$libraryId/Layout/Sidebar/JobManager/JobGroup.tsx +++ b/interface/app/$libraryId/Layout/Sidebar/JobManager/JobGroup.tsx @@ -201,7 +201,11 @@ function Options({ {(group.status === 'Queued' || group.status === 'Paused' || isJobPaused) && (