Skip to content

Commit

Permalink
Hardlink resident layers during detach ancestor (#10729)
Browse files Browse the repository at this point in the history
After a detach ancestor operation, we don't want to on-demand download
layers that are already resident. This has shown to impede performance,
sometimes quite a lot (50 seconds:
#8828 (comment))

Fixes #8828.
  • Loading branch information
arpad-m authored Feb 11, 2025
1 parent be447ba commit f7b2293
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 33 deletions.
2 changes: 0 additions & 2 deletions pageserver/src/tenant/storage_layer/layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ impl Layer {
/// while the guard exists.
///
/// Returns None if the layer is currently evicted or becoming evicted.
#[cfg(test)]
pub(crate) async fn keep_resident(&self) -> Option<ResidentLayer> {
let downloaded = self.0.inner.get().and_then(|rowe| rowe.get())?;

Expand Down Expand Up @@ -530,7 +529,6 @@ impl ResidentOrWantedEvicted {
/// This is not used on the read path (anything that calls
/// [`LayerInner::get_or_maybe_download`]) because it was decided that reads always win
/// evictions, and part of that winning is using [`ResidentOrWantedEvicted::get_and_upgrade`].
#[cfg(test)]
fn get(&self) -> Option<Arc<DownloadedLayer>> {
match self {
ResidentOrWantedEvicted::Resident(strong) => Some(strong.clone()),
Expand Down
90 changes: 59 additions & 31 deletions pageserver/src/tenant/timeline/detach_ancestor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ use crate::{
task_mgr::TaskKind,
tenant::{
remote_timeline_client::index::GcBlockingReason::DetachAncestor,
storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer},
storage_layer::{
layer::local_layer_path, AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer,
},
Tenant,
},
virtual_file::{MaybeFatalIo, VirtualFile},
Expand Down Expand Up @@ -351,18 +353,7 @@ pub(super) async fn prepare(

// FIXME: the fsync should be mandatory, after both rewrites and copies
if wrote_any {
let timeline_dir = VirtualFile::open(
&detached
.conf
.timeline_path(&detached.tenant_shard_id, &detached.timeline_id),
ctx,
)
.await
.fatal_err("VirtualFile::open for timeline dir fsync");
timeline_dir
.sync_all()
.await
.fatal_err("VirtualFile::sync_all timeline dir");
fsync_timeline_dir(detached, ctx).await;
}
}

Expand All @@ -376,24 +367,28 @@ pub(super) async fn prepare(
tasks.spawn(
async move {
let _permit = limiter.acquire().await;
let owned = remote_copy(
let (owned, did_hardlink) = remote_copy(
&adopted,
&timeline,
timeline.generation,
timeline.shard_identity,
&timeline.cancel,
)
.await?;
tracing::info!(layer=%owned, "remote copied");
Ok(owned)
tracing::info!(layer=%owned, did_hard_link=%did_hardlink, "remote copied");
Ok((owned, did_hardlink))
}
.in_current_span(),
);
}

let mut should_fsync = false;
while let Some(res) = tasks.join_next().await {
match res {
Ok(Ok(owned)) => {
Ok(Ok((owned, did_hardlink))) => {
if did_hardlink {
should_fsync = true;
}
new_layers.push(owned);
}
Ok(Err(failed)) => {
Expand All @@ -403,7 +398,10 @@ pub(super) async fn prepare(
}
}

// TODO: fsync directory again if we hardlinked something
// fsync directory again if we hardlinked something
if should_fsync {
fsync_timeline_dir(detached, ctx).await;
}

let prepared = PreparedTimelineDetach { layers: new_layers };

Expand Down Expand Up @@ -629,35 +627,52 @@ async fn copy_lsn_prefix(
}
}

/// Creates a new Layer instance for the adopted layer, and ensures it is found from the remote
/// storage on successful return without the adopted layer being added to `index_part.json`.
/// Creates a new Layer instance for the adopted layer, and ensures it is found in the remote
/// storage on successful return. without the adopted layer being added to `index_part.json`.
/// Returns (Layer, did hardlink)
async fn remote_copy(
adopted: &Layer,
adoptee: &Arc<Timeline>,
generation: Generation,
shard_identity: ShardIdentity,
cancel: &CancellationToken,
) -> Result<Layer, Error> {
// depending if Layer::keep_resident we could hardlink

) -> Result<(Layer, bool), Error> {
let mut metadata = adopted.metadata();
debug_assert!(metadata.generation <= generation);
metadata.generation = generation;
metadata.shard = shard_identity.shard_index();

let owned = crate::tenant::storage_layer::Layer::for_evicted(
adoptee.conf,
adoptee,
adopted.layer_desc().layer_name(),
metadata,
);
let conf = adoptee.conf;
let file_name = adopted.layer_desc().layer_name();

// depending if Layer::keep_resident, do a hardlink
let did_hardlink;
let owned = if let Some(adopted_resident) = adopted.keep_resident().await {
let adopted_path = adopted_resident.local_path();
let adoptee_path = local_layer_path(
conf,
&adoptee.tenant_shard_id,
&adoptee.timeline_id,
&file_name,
&metadata.generation,
);
std::fs::hard_link(adopted_path, &adoptee_path)
.map_err(|e| Error::launder(e.into(), Error::Prepare))?;
did_hardlink = true;
Layer::for_resident(conf, adoptee, adoptee_path, file_name, metadata).drop_eviction_guard()
} else {
did_hardlink = false;
Layer::for_evicted(conf, adoptee, file_name, metadata)
};

adoptee
let layer = adoptee
.remote_client
.copy_timeline_layer(adopted, &owned, cancel)
.await
.map(move |()| owned)
.map_err(|e| Error::launder(e, Error::Prepare))
.map_err(|e| Error::launder(e, Error::Prepare))?;

Ok((layer, did_hardlink))
}

pub(crate) enum DetachingAndReparenting {
Expand Down Expand Up @@ -1001,3 +1016,16 @@ fn check_no_archived_children_of_ancestor(
}
Ok(())
}

async fn fsync_timeline_dir(timeline: &Timeline, ctx: &RequestContext) {
let path = &timeline
.conf
.timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id);
let timeline_dir = VirtualFile::open(&path, ctx)
.await
.fatal_err("VirtualFile::open for timeline dir fsync");
timeline_dir
.sync_all()
.await
.fatal_err("VirtualFile::sync_all timeline dir");
}

1 comment on commit f7b2293

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

7576 tests run: 7184 passed, 0 failed, 392 skipped (full report)


Flaky tests (4)

Postgres 15

Postgres 14

Code coverage* (full report)

  • functions: 33.2% (8582 of 25850 functions)
  • lines: 49.1% (72283 of 147327 lines)

* collected from Rust tests only


The comment gets automatically updated with the latest test results
f7b2293 at 2025-02-11T19:36:19.829Z :recycle:

Please sign in to comment.