From 4a3a6d3f1f8d45e227d7a521ae13d27e85033ea5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Fri, 7 Feb 2025 22:06:13 +0100 Subject: [PATCH 1/4] Hardlink resident layers during detach ancestor --- pageserver/src/tenant/storage_layer/layer.rs | 2 -- .../src/tenant/timeline/detach_ancestor.rs | 35 +++++++++++++------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index 92313afba7b7..40282defd418 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -353,7 +353,6 @@ impl Layer { /// while the guard exists. /// /// Returns None if the layer is currently evicted or becoming evicted. - #[cfg(test)] pub(crate) async fn keep_resident(&self) -> Option { let downloaded = self.0.inner.get().and_then(|rowe| rowe.get())?; @@ -530,7 +529,6 @@ impl ResidentOrWantedEvicted { /// This is not used on the read path (anything that calls /// [`LayerInner::get_or_maybe_download`]) because it was decided that reads always win /// evictions, and part of that winning is using [`ResidentOrWantedEvicted::get_and_upgrade`]. - #[cfg(test)] fn get(&self) -> Option> { match self { ResidentOrWantedEvicted::Resident(strong) => Some(strong.clone()), diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index f8bc4352e220..143ee7089261 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -6,7 +6,9 @@ use crate::{ task_mgr::TaskKind, tenant::{ remote_timeline_client::index::GcBlockingReason::DetachAncestor, - storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer}, + storage_layer::{ + layer::local_layer_path, AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer, + }, Tenant, }, virtual_file::{MaybeFatalIo, VirtualFile}, @@ -629,8 +631,8 @@ async fn copy_lsn_prefix( } } -/// Creates a new Layer instance for the adopted layer, and ensures it is found from the remote -/// storage on successful return without the adopted layer being added to `index_part.json`. +/// Creates a new Layer instance for the adopted layer, and ensures it is found in the remote +/// storage on successful return. without the adopted layer being added to `index_part.json`. async fn remote_copy( adopted: &Layer, adoptee: &Arc, @@ -638,19 +640,30 @@ async fn remote_copy( shard_identity: ShardIdentity, cancel: &CancellationToken, ) -> Result { - // depending if Layer::keep_resident we could hardlink - let mut metadata = adopted.metadata(); debug_assert!(metadata.generation <= generation); metadata.generation = generation; metadata.shard = shard_identity.shard_index(); - let owned = crate::tenant::storage_layer::Layer::for_evicted( - adoptee.conf, - adoptee, - adopted.layer_desc().layer_name(), - metadata, - ); + let conf = adoptee.conf; + let file_name = adopted.layer_desc().layer_name(); + + // depending if Layer::keep_resident, do a hardlink + let owned = if let Some(adopted_resident) = adopted.keep_resident().await { + let adopted_path = adopted_resident.local_path(); + let adoptee_path = local_layer_path( + conf, + &adoptee.tenant_shard_id, + &adoptee.timeline_id, + &file_name, + &metadata.generation, + ); + std::fs::hard_link(adopted_path, &adoptee_path) + .map_err(|e| Error::launder(e.into(), Error::Prepare))?; + Layer::for_resident(conf, adoptee, adoptee_path, file_name, metadata).drop_eviction_guard() + } else { + Layer::for_evicted(conf, adoptee, file_name, metadata) + }; adoptee .remote_client From 42f38bb87ca1ab4838cfd812aa11877b0c55991a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 11 Feb 2025 12:38:39 +0100 Subject: [PATCH 2/4] Fsync the dir after hardlink --- .../src/tenant/timeline/detach_ancestor.rs | 55 ++++++++++++++----- 1 file changed, 40 insertions(+), 15 deletions(-) diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index 143ee7089261..eb0fc7a43cd6 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -14,6 +14,7 @@ use crate::{ virtual_file::{MaybeFatalIo, VirtualFile}, }; use anyhow::Context; +use camino::Utf8Path; use pageserver_api::{models::detach_ancestor::AncestorDetached, shard::ShardIdentity}; use tokio::sync::Semaphore; use tokio_util::sync::CancellationToken; @@ -353,18 +354,13 @@ pub(super) async fn prepare( // FIXME: the fsync should be mandatory, after both rewrites and copies if wrote_any { - let timeline_dir = VirtualFile::open( + fsync_timeline_dir( &detached .conf .timeline_path(&detached.tenant_shard_id, &detached.timeline_id), ctx, ) - .await - .fatal_err("VirtualFile::open for timeline dir fsync"); - timeline_dir - .sync_all() - .await - .fatal_err("VirtualFile::sync_all timeline dir"); + .await; } } @@ -378,7 +374,7 @@ pub(super) async fn prepare( tasks.spawn( async move { let _permit = limiter.acquire().await; - let owned = remote_copy( + let (owned, did_hardlink) = remote_copy( &adopted, &timeline, timeline.generation, @@ -386,16 +382,20 @@ pub(super) async fn prepare( &timeline.cancel, ) .await?; - tracing::info!(layer=%owned, "remote copied"); - Ok(owned) + tracing::info!(layer=%owned, did_hard_link=%did_hardlink, "remote copied"); + Ok((owned, did_hardlink)) } .in_current_span(), ); } + let mut should_fsync = false; while let Some(res) = tasks.join_next().await { match res { - Ok(Ok(owned)) => { + Ok(Ok((owned, did_hardlink))) => { + if did_hardlink { + should_fsync = true; + } new_layers.push(owned); } Ok(Err(failed)) => { @@ -405,7 +405,16 @@ pub(super) async fn prepare( } } - // TODO: fsync directory again if we hardlinked something + // fsync directory again if we hardlinked something + if should_fsync { + fsync_timeline_dir( + &detached + .conf + .timeline_path(&detached.tenant_shard_id, &detached.timeline_id), + ctx, + ) + .await; + } let prepared = PreparedTimelineDetach { layers: new_layers }; @@ -633,13 +642,14 @@ async fn copy_lsn_prefix( /// Creates a new Layer instance for the adopted layer, and ensures it is found in the remote /// storage on successful return. without the adopted layer being added to `index_part.json`. +/// Returns (Layer, did hardlink) async fn remote_copy( adopted: &Layer, adoptee: &Arc, generation: Generation, shard_identity: ShardIdentity, cancel: &CancellationToken, -) -> Result { +) -> Result<(Layer, bool), Error> { let mut metadata = adopted.metadata(); debug_assert!(metadata.generation <= generation); metadata.generation = generation; @@ -649,6 +659,7 @@ async fn remote_copy( let file_name = adopted.layer_desc().layer_name(); // depending if Layer::keep_resident, do a hardlink + let did_hardlink; let owned = if let Some(adopted_resident) = adopted.keep_resident().await { let adopted_path = adopted_resident.local_path(); let adoptee_path = local_layer_path( @@ -660,17 +671,21 @@ async fn remote_copy( ); std::fs::hard_link(adopted_path, &adoptee_path) .map_err(|e| Error::launder(e.into(), Error::Prepare))?; + did_hardlink = true; Layer::for_resident(conf, adoptee, adoptee_path, file_name, metadata).drop_eviction_guard() } else { + did_hardlink = false; Layer::for_evicted(conf, adoptee, file_name, metadata) }; - adoptee + let layer = adoptee .remote_client .copy_timeline_layer(adopted, &owned, cancel) .await .map(move |()| owned) - .map_err(|e| Error::launder(e, Error::Prepare)) + .map_err(|e| Error::launder(e, Error::Prepare))?; + + Ok((layer, did_hardlink)) } pub(crate) enum DetachingAndReparenting { @@ -1014,3 +1029,13 @@ fn check_no_archived_children_of_ancestor( } Ok(()) } + +async fn fsync_timeline_dir(path: impl AsRef, ctx: &RequestContext) { + let timeline_dir = VirtualFile::open(path, ctx) + .await + .fatal_err("VirtualFile::open for timeline dir fsync"); + timeline_dir + .sync_all() + .await + .fatal_err("VirtualFile::sync_all timeline dir"); +} From 1d7a88353637d124f1f5d0c3c5a561c9efe274db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 11 Feb 2025 15:46:29 +0100 Subject: [PATCH 3/4] Pass timeline as an arg --- .../src/tenant/timeline/detach_ancestor.rs | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index eb0fc7a43cd6..81c9723805c2 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -14,7 +14,6 @@ use crate::{ virtual_file::{MaybeFatalIo, VirtualFile}, }; use anyhow::Context; -use camino::Utf8Path; use pageserver_api::{models::detach_ancestor::AncestorDetached, shard::ShardIdentity}; use tokio::sync::Semaphore; use tokio_util::sync::CancellationToken; @@ -354,13 +353,7 @@ pub(super) async fn prepare( // FIXME: the fsync should be mandatory, after both rewrites and copies if wrote_any { - fsync_timeline_dir( - &detached - .conf - .timeline_path(&detached.tenant_shard_id, &detached.timeline_id), - ctx, - ) - .await; + fsync_timeline_dir(&detached, ctx).await; } } @@ -407,13 +400,7 @@ pub(super) async fn prepare( // fsync directory again if we hardlinked something if should_fsync { - fsync_timeline_dir( - &detached - .conf - .timeline_path(&detached.tenant_shard_id, &detached.timeline_id), - ctx, - ) - .await; + fsync_timeline_dir(&detached, ctx).await; } let prepared = PreparedTimelineDetach { layers: new_layers }; @@ -1030,8 +1017,11 @@ fn check_no_archived_children_of_ancestor( Ok(()) } -async fn fsync_timeline_dir(path: impl AsRef, ctx: &RequestContext) { - let timeline_dir = VirtualFile::open(path, ctx) +async fn fsync_timeline_dir(timeline: &Timeline, ctx: &RequestContext) { + let path = &timeline + .conf + .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id); + let timeline_dir = VirtualFile::open(&path, ctx) .await .fatal_err("VirtualFile::open for timeline dir fsync"); timeline_dir From 18456d396fcc1e4e964310b81385eb212eaae7fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 11 Feb 2025 15:53:33 +0100 Subject: [PATCH 4/4] clippy --- pageserver/src/tenant/timeline/detach_ancestor.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index 81c9723805c2..b6347d12196f 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -353,7 +353,7 @@ pub(super) async fn prepare( // FIXME: the fsync should be mandatory, after both rewrites and copies if wrote_any { - fsync_timeline_dir(&detached, ctx).await; + fsync_timeline_dir(detached, ctx).await; } } @@ -400,7 +400,7 @@ pub(super) async fn prepare( // fsync directory again if we hardlinked something if should_fsync { - fsync_timeline_dir(&detached, ctx).await; + fsync_timeline_dir(detached, ctx).await; } let prepared = PreparedTimelineDetach { layers: new_layers };