diff --git a/Cargo.lock b/Cargo.lock index 3a1f26b9ba9..aefffc9128f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6114,6 +6114,7 @@ dependencies = [ "diesel", "expectorate", "hex", + "iddqd", "ipnetwork", "macaddr", "newtype_derive", @@ -6295,6 +6296,7 @@ version = "0.1.0" dependencies = [ "anyhow", "base64 0.22.1", + "camino", "chrono", "clickhouse-admin-keeper-client", "clickhouse-admin-server-client", @@ -6306,6 +6308,7 @@ dependencies = [ "gateway-test-utils", "gateway-types", "id-map", + "iddqd", "nexus-sled-agent-shared", "nexus-types", "omicron-common", @@ -6316,10 +6319,13 @@ dependencies = [ "reqwest", "serde_json", "sled-agent-client", + "sled-agent-types", + "sled-agent-zone-images-examples", "slog", "strum", "thiserror 1.0.69", "tokio", + "tufaceous-artifact", "typed-rng", "uuid", ] @@ -6652,6 +6658,7 @@ dependencies = [ name = "nexus-sled-agent-shared" version = "0.1.0" dependencies = [ + "camino", "chrono", "daft", "id-map", diff --git a/common/src/update/zone_manifest.rs b/common/src/update/zone_manifest.rs index f48e9717cd7..74d24ede76f 100644 --- a/common/src/update/zone_manifest.rs +++ b/common/src/update/zone_manifest.rs @@ -6,11 +6,12 @@ use std::fmt; use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; use omicron_uuid_kinds::MupdateUuid; +use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use tufaceous_artifact::ArtifactHash; /// Describes the set of Omicron zones written out into an install dataset. -#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema)] pub struct OmicronZoneManifest { /// The source of the manifest. pub source: OmicronZoneManifestSource, @@ -25,7 +26,10 @@ impl OmicronZoneManifest { } /// The source of truth for an Omicron zone manifest. -#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)] +#[derive( + Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize, JsonSchema, +)] +#[serde(tag = "source", rename_all = "snake_case")] pub enum OmicronZoneManifestSource { /// The manifest was written out by installinator and the mupdate process. Installinator { @@ -56,7 +60,15 @@ impl fmt::Display for OmicronZoneManifestSource { /// /// Part of [`OmicronZoneManifest`]. #[derive( - Clone, Debug, Eq, Ord, PartialEq, PartialOrd, Deserialize, Serialize, + Clone, + Debug, + Eq, + Ord, + PartialEq, + PartialOrd, + Deserialize, + Serialize, + JsonSchema, )] pub struct OmicronZoneFileMetadata { /// The file name. diff --git a/nexus-sled-agent-shared/Cargo.toml b/nexus-sled-agent-shared/Cargo.toml index 6bfd37c2934..9a9d3bf9c75 100644 --- a/nexus-sled-agent-shared/Cargo.toml +++ b/nexus-sled-agent-shared/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" workspace = true [dependencies] +camino.workspace = true chrono.workspace = true daft.workspace = true id-map.workspace = true diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index c6d7b1eabc1..aa48f8d2b18 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -8,6 +8,7 @@ use std::collections::BTreeMap; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::time::Duration; +use camino::Utf8PathBuf; use chrono::{DateTime, Utc}; use daft::Diffable; use id_map::IdMap; @@ -17,19 +18,24 @@ use iddqd::IdOrdMap; use iddqd::id_upcast; use omicron_common::disk::{DatasetKind, DatasetName}; use omicron_common::ledger::Ledgerable; +use omicron_common::update::OmicronZoneManifestSource; use omicron_common::{ api::{ external::{ByteCount, Generation}, internal::shared::{NetworkInterface, SourceNatConfig}, }, disk::{DatasetConfig, DiskVariant, OmicronPhysicalDiskConfig}, + snake_case_result::{self, SnakeCaseResult}, update::ArtifactId, zpool_name::ZpoolName, }; -use omicron_uuid_kinds::{DatasetUuid, OmicronZoneUuid}; +use omicron_uuid_kinds::{ + DatasetUuid, InternalZpoolUuid, MupdateUuid, OmicronZoneUuid, +}; use omicron_uuid_kinds::{MupdateOverrideUuid, PhysicalDiskUuid}; use omicron_uuid_kinds::{SledUuid, ZpoolUuid}; -use schemars::JsonSchema; +use schemars::schema::{Schema, SchemaObject}; +use schemars::{JsonSchema, SchemaGenerator}; use serde::{Deserialize, Serialize}; // Export this type for convenience -- this way, dependents don't have to // depend on sled-hardware-types. @@ -119,6 +125,7 @@ pub struct Inventory { pub ledgered_sled_config: Option, pub reconciler_status: ConfigReconcilerInventoryStatus, pub last_reconciliation: Option, + pub zone_image_resolver: ZoneImageResolverInventory, } /// Describes the last attempt made by the sled-agent-config-reconciler to @@ -257,6 +264,191 @@ pub enum ConfigReconcilerInventoryStatus { Idle { completed_at: DateTime, ran_for: Duration }, } +/// A simplified form of zone image resolver status. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] +pub struct ZoneImageResolverInventory { + /// The zone manifest status. + pub zone_manifest: ZoneManifestInventory, + + /// The mupdate override status. + pub mupdate_override: MupdateOverrideInventory, +} + +impl ZoneImageResolverInventory { + /// Returns a new, fake inventory for tests. + pub fn new_fake() -> Self { + Self { + zone_manifest: ZoneManifestInventory::new_fake(), + mupdate_override: MupdateOverrideInventory::new_fake(), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] +pub struct ZoneManifestInventory { + /// The path to the zone manifest file on the boot disk. + #[schemars(schema_with = "path_schema")] + pub boot_disk_path: Utf8PathBuf, + + /// The manifest read from disk. + #[serde(with = "snake_case_result")] + #[schemars( + schema_with = "SnakeCaseResult::::json_schema" + )] + pub manifest: Result, + + /// Warnings about non-boot disks, if any. + pub non_boot_status: IdOrdMap, +} + +impl ZoneManifestInventory { + /// Returns a new, fake inventory for tests. + pub fn new_fake() -> Self { + Self { + boot_disk_path: Utf8PathBuf::from("/fake/path/install/zones.json"), + manifest: Ok(ZoneArtifactsInventory::new_fake()), + non_boot_status: IdOrdMap::new(), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] +pub struct ZoneArtifactsInventory { + /// The manifest source. + pub source: OmicronZoneManifestSource, + + /// The artifacts on disk. + pub artifacts: IdOrdMap, +} + +impl ZoneArtifactsInventory { + /// Returns a new, fake inventory for tests. + pub fn new_fake() -> Self { + Self { + source: OmicronZoneManifestSource::Installinator { + mupdate_id: MupdateUuid::nil(), + }, + // TODO: fill out some fake zones here? maybe a representative + // selection of real zones? + artifacts: IdOrdMap::new(), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] +pub struct ZoneArtifactInventory { + /// The filename. + pub file_name: String, + + /// The full path to the file. + #[schemars(schema_with = "path_schema")] + pub path: Utf8PathBuf, + + /// The expected size of the file. + pub expected_size: u64, + + /// The expected hash of the file. + pub expected_hash: ArtifactHash, + + /// The status. + #[serde(with = "snake_case_result")] + #[schemars(schema_with = "SnakeCaseResult::<(), String>::json_schema")] + pub status: Result<(), String>, +} + +impl IdOrdItem for ZoneArtifactInventory { + type Key<'a> = &'a str; + fn key(&self) -> Self::Key<'_> { + &self.file_name + } + id_upcast!(); +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] +pub struct ZoneManifestNonBootInventory { + /// The non-boot zpool ID. + pub zpool_id: InternalZpoolUuid, + + /// The path to the zone manifest JSON on the non-boot disk. + #[schemars(schema_with = "path_schema")] + pub path: Utf8PathBuf, + + /// Whether the status is valid. + pub is_valid: bool, + + /// A message describing the status. If `is_valid` is false, then this + /// message describes the reason for the invalid status. + pub message: String, +} + +impl IdOrdItem for ZoneManifestNonBootInventory { + type Key<'a> = InternalZpoolUuid; + fn key(&self) -> Self::Key<'_> { + self.zpool_id + } + id_upcast!(); +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] +pub struct MupdateOverrideInventory { + /// The path to the mupdate override JSON on the boot disk. + #[schemars(schema_with = "path_schema")] + pub boot_disk_path: Utf8PathBuf, + + /// The boot disk override, or an error if it could not be parsed. + #[serde(with = "snake_case_result")] + #[schemars(schema_with = "SnakeCaseResult::<(), String>::json_schema")] + pub boot_disk_override: + Result, String>, + + /// Warnings about non-boot disks, if any. + pub non_boot_status: IdOrdMap, +} + +impl MupdateOverrideInventory { + /// Returns a new, fake inventory for tests. + pub fn new_fake() -> Self { + Self { + boot_disk_path: Utf8PathBuf::from( + "/fake/path/install/mupdate_override.json", + ), + boot_disk_override: Ok(None), + non_boot_status: IdOrdMap::new(), + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] +pub struct MupdateOverrideInfoInventory { + /// The mupdate override UUID. + pub mupdate_override_id: MupdateOverrideUuid, +} + +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, JsonSchema, Serialize)] +pub struct MupdateOverrideNonBootInventory { + /// The non-boot zpool ID. + pub zpool_id: InternalZpoolUuid, + + /// The path to the mupdate override JSON on the non-boot disk. + #[schemars(schema_with = "path_schema")] + pub path: Utf8PathBuf, + + /// Whether the status is valid. + pub is_valid: bool, + + /// A message describing the status. If `is_valid` is false, then this + /// message describes the reason for the invalid status. + pub message: String, +} + +impl IdOrdItem for MupdateOverrideNonBootInventory { + type Key<'a> = InternalZpoolUuid; + fn key(&self) -> Self::Key<'_> { + self.zpool_id + } + id_upcast!(); +} + /// Describes the role of the sled within the rack. /// /// Note that this may change if the sled is physically moved @@ -941,3 +1133,11 @@ mod tests { } } } + +// Used for schemars to be able to be used with camino: +// See https://github.com/camino-rs/camino/issues/91#issuecomment-2027908513 +fn path_schema(generator: &mut SchemaGenerator) -> Schema { + let mut schema: SchemaObject = ::json_schema(generator).into(); + schema.format = Some("Utf8PathBuf".to_owned()); + schema.into() +} diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index 62c42713b37..fdea9044648 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -18,6 +18,7 @@ clickhouse-admin-types.workspace = true derive-where.workspace = true diesel = { workspace = true, features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } hex.workspace = true +iddqd.workspace = true ipnetwork.workspace = true macaddr.workspace = true newtype_derive.workspace = true diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 6461af1e2cf..599cc9eb0ac 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -23,22 +23,34 @@ use diesel::expression::AsExpression; use diesel::pg::Pg; use diesel::serialize::ToSql; use diesel::{serialize, sql_types}; +use iddqd::IdOrdMap; use ipnetwork::IpNetwork; +use nexus_db_schema::schema::inv_zone_manifest_non_boot; +use nexus_db_schema::schema::inv_zone_manifest_zone; use nexus_db_schema::schema::{ hw_baseboard_id, inv_caboose, inv_clickhouse_keeper_membership, inv_collection, inv_collection_error, inv_dataset, inv_last_reconciliation_dataset_result, inv_last_reconciliation_disk_result, inv_last_reconciliation_orphaned_dataset, - inv_last_reconciliation_zone_result, inv_nvme_disk_firmware, - inv_omicron_sled_config, inv_omicron_sled_config_dataset, - inv_omicron_sled_config_disk, inv_omicron_sled_config_zone, - inv_omicron_sled_config_zone_nic, inv_physical_disk, inv_root_of_trust, - inv_root_of_trust_page, inv_service_processor, inv_sled_agent, inv_zpool, - sw_caboose, sw_root_of_trust_page, + inv_last_reconciliation_zone_result, inv_mupdate_override_non_boot, + inv_nvme_disk_firmware, inv_omicron_sled_config, + inv_omicron_sled_config_dataset, inv_omicron_sled_config_disk, + inv_omicron_sled_config_zone, inv_omicron_sled_config_zone_nic, + inv_physical_disk, inv_root_of_trust, inv_root_of_trust_page, + inv_service_processor, inv_sled_agent, inv_zpool, sw_caboose, + sw_root_of_trust_page, }; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::MupdateOverrideInfoInventory; +use nexus_sled_agent_shared::inventory::MupdateOverrideInventory; +use nexus_sled_agent_shared::inventory::MupdateOverrideNonBootInventory; use nexus_sled_agent_shared::inventory::OrphanedDataset; +use nexus_sled_agent_shared::inventory::ZoneArtifactInventory; +use nexus_sled_agent_shared::inventory::ZoneArtifactsInventory; +use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; +use nexus_sled_agent_shared::inventory::ZoneManifestInventory; +use nexus_sled_agent_shared::inventory::ZoneManifestNonBootInventory; use nexus_sled_agent_shared::inventory::{ ConfigReconcilerInventoryResult, OmicronSledConfig, OmicronZoneConfig, OmicronZoneDataset, OmicronZoneImageSource, OmicronZoneType, @@ -53,10 +65,13 @@ use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetName; use omicron_common::disk::DiskIdentity; use omicron_common::disk::OmicronPhysicalDiskConfig; +use omicron_common::update::OmicronZoneManifestSource; use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::DatasetKind; use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::InternalZpoolKind; +use omicron_uuid_kinds::MupdateKind; use omicron_uuid_kinds::MupdateOverrideKind; use omicron_uuid_kinds::MupdateOverrideUuid; use omicron_uuid_kinds::OmicronSledConfigKind; @@ -824,6 +839,9 @@ pub struct InvSledAgent { #[diesel(embed)] pub reconciler_status: InvConfigReconcilerStatus, + + #[diesel(embed)] + pub zone_image_resolver: InvZoneImageResolver, } /// See [`nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus`]. @@ -927,6 +945,7 @@ impl InvSledAgent { ledgered_sled_config: Option, last_reconciliation_sled_config: Option, reconciler_status: InvConfigReconcilerStatus, + zone_image_resolver: InvZoneImageResolver, ) -> Result { // It's irritating to have to check this case at runtime. The challenge // is that if this sled agent does have a baseboard id, we don't know @@ -968,6 +987,7 @@ impl InvSledAgent { last_reconciliation_sled_config: last_reconciliation_sled_config.map(From::from), reconciler_status, + zone_image_resolver, }) } } @@ -1154,6 +1174,280 @@ impl From for ConfigReconcilerInventoryResult { } } +// See [`omicron_common::update::OmicronZoneManifestSource`]. +impl_enum_type!( + InvZoneManifestSourceEnum: + + #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, PartialEq)] + pub enum InvZoneManifestSourceEnum; + + // Enum values + Installinator => b"installinator" + SledAgent => b"sled-agent" +); + +/// Rows corresponding to the zone image resolver in `inv_sled_agent`. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_sled_agent)] +pub struct InvZoneImageResolver { + pub zone_manifest_boot_disk_path: String, + pub zone_manifest_source: Option, + pub zone_manifest_mupdate_id: Option>, + pub zone_manifest_boot_disk_error: Option, + + pub mupdate_override_boot_disk_path: String, + pub mupdate_override_id: Option>, + pub mupdate_override_boot_disk_error: Option, +} + +impl InvZoneImageResolver { + /// Construct a new `InvZoneImageResolver`. + pub fn new(inv: &ZoneImageResolverInventory) -> Self { + let zone_manifest_boot_disk_path = + inv.zone_manifest.boot_disk_path.clone().into(); + let ( + zone_manifest_source, + zone_manifest_mupdate_id, + zone_manifest_boot_disk_error, + ) = match &inv.zone_manifest.manifest { + Ok(manifest) => match manifest.source { + OmicronZoneManifestSource::Installinator { mupdate_id } => ( + Some(InvZoneManifestSourceEnum::Installinator), + Some(mupdate_id.into()), + None, + ), + OmicronZoneManifestSource::SledAgent => { + (Some(InvZoneManifestSourceEnum::SledAgent), None, None) + } + }, + Err(error) => (None, None, Some(error.to_string())), + }; + + let mupdate_override_boot_disk_path = + inv.mupdate_override.boot_disk_path.clone().into(); + let mupdate_override_id = inv + .mupdate_override + .boot_disk_override + .as_ref() + .ok() + .cloned() + .flatten() + .map(|inv| inv.mupdate_override_id.into()); + let mupdate_override_boot_disk_error = + inv.mupdate_override.boot_disk_override.as_ref().err().cloned(); + + Self { + zone_manifest_boot_disk_path, + zone_manifest_source, + zone_manifest_mupdate_id, + zone_manifest_boot_disk_error, + mupdate_override_boot_disk_path, + mupdate_override_id, + mupdate_override_boot_disk_error, + } + } + + /// Convert self into the inventory type. + pub fn into_inventory( + self, + artifacts: Option>, + zone_manifest_non_boot: Option>, + mupdate_override_non_boot: Option< + IdOrdMap, + >, + ) -> ZoneImageResolverInventory { + // Build up the ZoneManifestInventory struct. + let manifest = if let Some(error) = self.zone_manifest_boot_disk_error { + Err(error) + } else { + let source = match self.zone_manifest_source { + Some(InvZoneManifestSourceEnum::Installinator) => { + OmicronZoneManifestSource::Installinator { + mupdate_id: self + .zone_manifest_mupdate_id + .expect( + "if the source is Installinator, then the + db schema guarantees that mupdate_id is Some", + ) + .into(), + } + } + Some(InvZoneManifestSourceEnum::SledAgent) => { + OmicronZoneManifestSource::SledAgent + } + None => { + unreachable!( + "if the source is None, then the db schema guarantees \ + that there was an error" + ) + } + }; + + Ok(ZoneArtifactsInventory { + source, + // Artifacts might really be None in case no zones were found. + // (This is unusual but permitted by the data model, so any + // checks around this should happen at a higher level.) + artifacts: artifacts.unwrap_or_default(), + }) + }; + + let zone_manifest = ZoneManifestInventory { + boot_disk_path: self.zone_manifest_boot_disk_path.into(), + manifest, + // This might be None if no non-boot disks were found. + non_boot_status: zone_manifest_non_boot.unwrap_or_default(), + }; + + // Build up the mupdate override struct. + let boot_disk_override = if let Some(error) = + self.mupdate_override_boot_disk_error + { + Err(error) + } else { + let info = self.mupdate_override_id.map(|id| { + MupdateOverrideInfoInventory { mupdate_override_id: id.into() } + }); + Ok(info) + }; + + let mupdate_override = MupdateOverrideInventory { + boot_disk_path: self.mupdate_override_boot_disk_path.into(), + boot_disk_override, + // This might be None if no non-boot disks were found. + non_boot_status: mupdate_override_non_boot.unwrap_or_default(), + }; + + ZoneImageResolverInventory { zone_manifest, mupdate_override } + } +} + +/// Represents a zone file entry from the zone manifest on a sled. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_zone_manifest_zone)] +pub struct InvZoneManifestZone { + pub inv_collection_id: DbTypedUuid, + pub sled_id: DbTypedUuid, + pub zone_file_name: String, + pub path: String, + pub expected_size: i64, + pub expected_sha256: ArtifactHash, + pub error: Option, +} + +impl InvZoneManifestZone { + pub fn new( + collection_id: CollectionUuid, + sled_id: SledUuid, + artifact: &ZoneArtifactInventory, + ) -> Self { + Self { + inv_collection_id: collection_id.into(), + sled_id: sled_id.into(), + zone_file_name: artifact.file_name.clone(), + path: artifact.path.clone().into(), + expected_size: artifact.expected_size as i64, + expected_sha256: artifact.expected_hash.into(), + error: artifact.status.as_ref().err().cloned(), + } + } +} + +impl From for ZoneArtifactInventory { + fn from(row: InvZoneManifestZone) -> Self { + Self { + file_name: row.zone_file_name, + path: row.path.into(), + expected_size: row.expected_size as u64, + expected_hash: row.expected_sha256.into(), + status: match row.error { + None => Ok(()), + Some(error) => Err(error), + }, + } + } +} + +/// Represents a non-boot zpool entry from the zone manifest on a sled. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_zone_manifest_non_boot)] +pub struct InvZoneManifestNonBoot { + pub inv_collection_id: DbTypedUuid, + pub sled_id: DbTypedUuid, + pub non_boot_zpool_id: DbTypedUuid, + pub path: String, + pub is_valid: bool, + pub message: String, +} + +impl InvZoneManifestNonBoot { + pub fn new( + collection_id: CollectionUuid, + sled_id: SledUuid, + non_boot: &ZoneManifestNonBootInventory, + ) -> Self { + Self { + inv_collection_id: collection_id.into(), + sled_id: sled_id.into(), + non_boot_zpool_id: non_boot.zpool_id.into(), + path: non_boot.path.clone().into(), + is_valid: non_boot.is_valid, + message: non_boot.message.clone(), + } + } +} + +impl From for ZoneManifestNonBootInventory { + fn from(row: InvZoneManifestNonBoot) -> Self { + Self { + zpool_id: row.non_boot_zpool_id.into(), + path: row.path.into(), + is_valid: row.is_valid, + message: row.message, + } + } +} + +/// Represents a non-boot zpool entry from the mupdate override on a sled. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = inv_mupdate_override_non_boot)] +pub struct InvMupdateOverrideNonBoot { + pub inv_collection_id: DbTypedUuid, + pub sled_id: DbTypedUuid, + pub non_boot_zpool_id: DbTypedUuid, + pub path: String, + pub is_valid: bool, + pub message: String, +} + +impl InvMupdateOverrideNonBoot { + pub fn new( + collection_id: CollectionUuid, + sled_id: SledUuid, + non_boot: &MupdateOverrideNonBootInventory, + ) -> Self { + Self { + inv_collection_id: collection_id.into(), + sled_id: sled_id.into(), + non_boot_zpool_id: non_boot.zpool_id.into(), + path: non_boot.path.clone().into(), + is_valid: non_boot.is_valid, + message: non_boot.message.clone(), + } + } +} + +impl From for MupdateOverrideNonBootInventory { + fn from(row: InvMupdateOverrideNonBoot) -> Self { + Self { + zpool_id: row.non_boot_zpool_id.into(), + path: row.path.into(), + is_valid: row.is_valid, + message: row.message, + } + } +} + /// See [`nexus_types::inventory::PhysicalDisk`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_physical_disk)] diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index bd270fb4c78..330df35a444 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -16,7 +16,7 @@ use std::{collections::BTreeMap, sync::LazyLock}; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: Version = Version::new(150, 0, 0); +pub const SCHEMA_VERSION: Version = Version::new(151, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -28,6 +28,7 @@ static KNOWN_VERSIONS: LazyLock> = LazyLock::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(151, "zone-image-resolver-inventory"), KnownVersion::new(150, "add-last-reconciliation-orphaned-datasets"), KnownVersion::new(149, "bp-add-target-release-min-gen"), KnownVersion::new(148, "clean-misplaced-m2s"), diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 605cc68b5d7..fb34701bba8 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -30,9 +30,6 @@ use iddqd::IdOrdMap; use nexus_db_errors::ErrorHandler; use nexus_db_errors::public_error_from_diesel; use nexus_db_errors::public_error_from_diesel_lookup; -use nexus_db_model::HwBaseboardId; -use nexus_db_model::HwPowerState; -use nexus_db_model::HwRotSlot; use nexus_db_model::InvCaboose; use nexus_db_model::InvClickhouseKeeperMembership; use nexus_db_model::InvCollection; @@ -64,7 +61,11 @@ use nexus_db_model::SqlU32; use nexus_db_model::SwCaboose; use nexus_db_model::SwRotPage; use nexus_db_model::to_db_typed_uuid; -use nexus_db_schema::enums::HwPowerStateEnum; +use nexus_db_model::{ + HwBaseboardId, InvZoneImageResolver, InvZoneManifestZone, +}; +use nexus_db_model::{HwPowerState, InvZoneManifestNonBoot}; +use nexus_db_model::{HwRotSlot, InvMupdateOverrideNonBoot}; use nexus_db_schema::enums::HwRotSlotEnum; use nexus_db_schema::enums::RotImageErrorEnum; use nexus_db_schema::enums::RotPageWhichEnum; @@ -73,11 +74,15 @@ use nexus_db_schema::enums::SpTypeEnum; use nexus_db_schema::enums::{ CabooseWhichEnum, InvConfigReconcilerStatusKindEnum, }; +use nexus_db_schema::enums::{HwPowerStateEnum, InvZoneManifestSourceEnum}; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; +use nexus_sled_agent_shared::inventory::MupdateOverrideNonBootInventory; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OrphanedDataset; +use nexus_sled_agent_shared::inventory::ZoneArtifactInventory; +use nexus_sled_agent_shared::inventory::ZoneManifestNonBootInventory; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::Collection; use nexus_types::inventory::PhysicalDiskFirmware; @@ -218,12 +223,76 @@ impl DataStore { }) .collect(); + // Pull zone manifest zones out of all sled agents. + let zone_manifest_zones: Vec<_> = collection + .sled_agents + .iter() + .filter_map(|(sled_id, sled_agent)| { + sled_agent + .zone_image_resolver + .zone_manifest + .manifest + .as_ref() + .ok() + .map(|artifacts| { + artifacts.artifacts.iter().map(|artifact| { + InvZoneManifestZone::new( + collection_id, + *sled_id, + artifact, + ) + }) + }) + }) + .flatten() + .collect(); + + // Pull zone manifest non-boot info out of all sled agents. + let zone_manifest_non_boot: Vec<_> = collection + .sled_agents + .iter() + .flat_map(|(sled_id, sled_agent)| { + sled_agent + .zone_image_resolver + .zone_manifest + .non_boot_status + .iter() + .map(|non_boot| { + InvZoneManifestNonBoot::new( + collection_id, + *sled_id, + non_boot, + ) + }) + }) + .collect(); + + // Pull mupdate override non-boot info out of all sled agents. + let mupdate_override_non_boot: Vec<_> = collection + .sled_agents + .iter() + .flat_map(|(sled_id, sled_agent)| { + sled_agent + .zone_image_resolver + .mupdate_override + .non_boot_status + .iter() + .map(|non_boot| { + InvMupdateOverrideNonBoot::new( + collection_id, + *sled_id, + non_boot, + ) + }) + }) + .collect(); + // Build up a list of `OmicronSledConfig`s we need to insert. Each sled // has 0-3: // // * The ledgered sled config (if the sled has gotten a config from RSS // or Nexus) - // * The most-recently-reconciled config (if the sled-agent's config + // * The most-recently-reconciled config (if the sled-agen's config // reconciler has run since the last time it started) // * The currently-being-reconciled config (if the sled-agent's config // reconciler was actively running when inventory was collected) @@ -271,12 +340,15 @@ impl DataStore { } = config_reconciler_fields_by_sled .remove(&sled_agent.sled_id) .expect("all sled IDs should exist"); + let zone_image_resolver = + InvZoneImageResolver::new(&sled_agent.zone_image_resolver); InvSledAgent::new_without_baseboard( collection_id, sled_agent, ledgered_sled_config, last_reconciliation_sled_config, reconciler_status, + zone_image_resolver, ) .map_err(|e| Error::internal_error(&e.to_string())) }) @@ -1051,6 +1123,64 @@ impl DataStore { } } + // Insert rows for all the zones found in the zone manifest on the + // boot disk. + { + use nexus_db_schema::schema::inv_zone_manifest_zone::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut zones = zone_manifest_zones.into_iter(); + loop { + let some_zones = + zones.by_ref().take(batch_size).collect::>(); + if some_zones.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_zone_manifest_zone) + .values(some_zones) + .execute_async(&conn) + .await?; + } + } + + // Insert rows for non-boot zone manifests. + { + use nexus_db_schema::schema::inv_zone_manifest_non_boot::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut non_boot = zone_manifest_non_boot.into_iter(); + loop { + let some_non_boot = + non_boot.by_ref().take(batch_size).collect::>(); + if some_non_boot.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_zone_manifest_non_boot) + .values(some_non_boot) + .execute_async(&conn) + .await?; + } + } + + // Insert rows for non-boot mupdate overrides. + { + use nexus_db_schema::schema::inv_mupdate_override_non_boot::dsl; + + let batch_size = SQL_BATCH_SIZE.get().try_into().unwrap(); + let mut non_boot = mupdate_override_non_boot.into_iter(); + loop { + let some_non_boot = + non_boot.by_ref().take(batch_size).collect::>(); + if some_non_boot.is_empty() { + break; + } + let _ = diesel::insert_into(dsl::inv_mupdate_override_non_boot) + .values(some_non_boot) + .execute_async(&conn) + .await?; + } + } + // Insert rows for the sled agents that we found. In practice, we'd // expect these to all have baseboards (if using Oxide hardware) or // none have baseboards (if not). @@ -1072,6 +1202,7 @@ impl DataStore { } = config_reconciler_fields_by_sled .remove(&sled_agent.sled_id) .expect("all sled IDs should exist"); + let zone_image_resolver = InvZoneImageResolver::new(&sled_agent.zone_image_resolver); let selection = nexus_db_schema::schema::hw_baseboard_id::table .select(( db_collection_id @@ -1119,6 +1250,20 @@ impl DataStore { .into_sql::>(), reconciler_status.reconciler_status_duration_secs .into_sql::>(), + zone_image_resolver.zone_manifest_boot_disk_path + .into_sql::(), + zone_image_resolver.zone_manifest_source + .into_sql::>(), + zone_image_resolver.zone_manifest_mupdate_id + .into_sql::>(), + zone_image_resolver.zone_manifest_boot_disk_error + .into_sql::>(), + zone_image_resolver.mupdate_override_boot_disk_path + .into_sql::(), + zone_image_resolver.mupdate_override_id + .into_sql::>(), + zone_image_resolver.mupdate_override_boot_disk_error + .into_sql::>(), )) .filter( baseboard_dsl::part_number @@ -1150,6 +1295,13 @@ impl DataStore { sa_dsl::reconciler_status_sled_config, sa_dsl::reconciler_status_timestamp, sa_dsl::reconciler_status_duration_secs, + sa_dsl::zone_manifest_boot_disk_path, + sa_dsl::zone_manifest_source, + sa_dsl::zone_manifest_mupdate_id, + sa_dsl::zone_manifest_boot_disk_error, + sa_dsl::mupdate_override_boot_disk_path, + sa_dsl::mupdate_override_id, + sa_dsl::mupdate_override_boot_disk_error, )) .execute_async(&conn) .await?; @@ -1175,6 +1327,13 @@ impl DataStore { _reconciler_status_sled_config, _reconciler_status_timestamp, _reconciler_status_duration_secs, + _zone_manifest_boot_disk_path, + _zone_manifest_source, + _zone_manifest_mupdate_id, + _zone_manifest_boot_disk_error, + _mupdate_override_boot_disk_path, + _mupdate_override_boot_disk_id, + _mupdate_override_boot_disk_error, ) = sa_dsl::inv_sled_agent::all_columns(); } @@ -1474,6 +1633,9 @@ impl DataStore { nlast_reconciliation_dataset_results: usize, nlast_reconciliation_orphaned_datasets: usize, nlast_reconciliation_zone_results: usize, + nzone_manifest_zones: usize, + nzone_manifest_non_boot: usize, + nmupdate_override_non_boot: usize, nomicron_sled_configs: usize, nomicron_sled_config_disks: usize, nomicron_sled_config_datasets: usize, @@ -1498,6 +1660,9 @@ impl DataStore { nlast_reconciliation_dataset_results, nlast_reconciliation_orphaned_datasets, nlast_reconciliation_zone_results, + nzone_manifest_zones, + nzone_manifest_non_boot, + nmupdate_override_non_boot, nomicron_sled_configs, nomicron_sled_config_disks, nomicron_sled_config_datasets, @@ -1635,6 +1800,32 @@ impl DataStore { .await? }; + // Remove rows associated with zone resolver inventory. + let nzone_manifest_zones = { + use nexus_db_schema::schema::inv_zone_manifest_zone::dsl; + diesel::delete(dsl::inv_zone_manifest_zone.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + let nzone_manifest_non_boot = { + use nexus_db_schema::schema::inv_zone_manifest_non_boot::dsl; + diesel::delete(dsl::inv_zone_manifest_non_boot.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + let nmupdate_override_non_boot = { + use nexus_db_schema::schema::inv_mupdate_override_non_boot::dsl; + diesel::delete(dsl::inv_mupdate_override_non_boot.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; + // Remove rows associated with `OmicronSledConfig`s. let nomicron_sled_configs = { use nexus_db_schema::schema::inv_omicron_sled_config::dsl; @@ -1722,6 +1913,9 @@ impl DataStore { nlast_reconciliation_dataset_results, nlast_reconciliation_orphaned_datasets, nlast_reconciliation_zone_results, + nzone_manifest_zones, + nzone_manifest_non_boot, + nmupdate_override_non_boot, nomicron_sled_configs, nomicron_sled_config_disks, nomicron_sled_config_datasets, @@ -1756,6 +1950,9 @@ impl DataStore { nlast_reconciliation_orphaned_datasets, "nlast_reconciliation_zone_results" => nlast_reconciliation_zone_results, + "nzone_manifest_zones" => nzone_manifest_zones, + "nzone_manifest_non_boot" => nzone_manifest_non_boot, + "nmupdate_override_non_boot" => nmupdate_override_non_boot, "nomicron_sled_configs" => nomicron_sled_configs, "nomicron_sled_config_disks" => nomicron_sled_config_disks, "nomicron_sled_config_datasets" => nomicron_sled_config_datasets, @@ -2804,6 +3001,122 @@ impl DataStore { results }; + // Load zone_manifest_zone rows. + let mut zone_manifest_artifacts_by_sled_id = { + use nexus_db_schema::schema::inv_zone_manifest_zone::dsl; + + let mut by_sled_id: BTreeMap< + SledUuid, + IdOrdMap, + > = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated_multicolumn( + dsl::inv_zone_manifest_zone, + (dsl::sled_id, dsl::zone_file_name), + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(db_id)) + .select(InvZoneManifestZone::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| { + (row.sled_id, row.zone_file_name.clone()) + }); + + for row in batch { + by_sled_id + .entry(row.sled_id.into()) + .or_default() + .insert_unique(row.into()) + .expect("database ensures the row is unique"); + } + } + + by_sled_id + }; + + // Load zone-manifest non-boot rows. + let mut zone_manifest_non_boot_by_sled_id = { + use nexus_db_schema::schema::inv_zone_manifest_non_boot::dsl; + + let mut by_sled_id: BTreeMap< + SledUuid, + IdOrdMap, + > = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated_multicolumn( + dsl::inv_zone_manifest_non_boot, + (dsl::sled_id, dsl::non_boot_zpool_id), + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(db_id)) + .select(InvZoneManifestNonBoot::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| { + (row.sled_id, row.non_boot_zpool_id) + }); + + for row in batch { + by_sled_id + .entry(row.sled_id.into()) + .or_default() + .insert_unique(row.into()) + .expect("database ensures the row is unique"); + } + } + + by_sled_id + }; + + // Load mupdate-override non-boot rows. + let mut mupdate_override_non_boot_by_sled_id = { + use nexus_db_schema::schema::inv_mupdate_override_non_boot::dsl; + + let mut by_sled_id: BTreeMap< + SledUuid, + IdOrdMap, + > = BTreeMap::new(); + + let mut paginator = Paginator::new(batch_size); + while let Some(p) = paginator.next() { + let batch = paginated_multicolumn( + dsl::inv_mupdate_override_non_boot, + (dsl::sled_id, dsl::non_boot_zpool_id), + &p.current_pagparams(), + ) + .filter(dsl::inv_collection_id.eq(db_id)) + .select(InvMupdateOverrideNonBoot::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + paginator = p.found_batch(&batch, &|row| { + (row.sled_id, row.non_boot_zpool_id) + }); + for row in batch { + by_sled_id + .entry(row.sled_id.into()) + .or_default() + .insert_unique(row.into()) + .expect("database ensures the row is unique"); + } + } + + by_sled_id + }; + // Now load the clickhouse keeper cluster memberships let clickhouse_keeper_cluster_membership = { use nexus_db_schema::schema::inv_clickhouse_keeper_membership::dsl; @@ -2910,6 +3223,12 @@ impl DataStore { }) .transpose()?; + let zone_image_resolver = s.zone_image_resolver.into_inventory( + zone_manifest_artifacts_by_sled_id.remove(&sled_id), + zone_manifest_non_boot_by_sled_id.remove(&sled_id), + mupdate_override_non_boot_by_sled_id.remove(&sled_id), + ); + let sled_agent = nexus_types::inventory::SledAgent { time_collected: s.time_collected, source: s.source, @@ -2944,6 +3263,7 @@ impl DataStore { ledgered_sled_config, reconciler_status, last_reconciliation, + zone_image_resolver, }; sled_agents.insert(sled_id, sled_agent); } diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index a276a10b5bf..9409c6c9e1d 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -340,7 +340,7 @@ mod test { use nexus_db_lookup::LookupPath; use nexus_sled_agent_shared::inventory::{ Baseboard, ConfigReconcilerInventoryStatus, Inventory, InventoryDisk, - SledRole, + SledRole, ZoneImageResolverInventory, }; use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; @@ -700,6 +700,7 @@ mod test { reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, last_reconciliation: None, + zone_image_resolver: ZoneImageResolverInventory::new_fake(), }, ) .unwrap(); diff --git a/nexus/db-schema/src/enums.rs b/nexus/db-schema/src/enums.rs index 6f8cd6f1774..f75d40beb3d 100644 --- a/nexus/db-schema/src/enums.rs +++ b/nexus/db-schema/src/enums.rs @@ -48,6 +48,7 @@ define_enums! { InstanceIntendedStateEnum => "instance_intended_state", InvConfigReconcilerStatusKindEnum => "inv_config_reconciler_status_kind", InvZoneImageSourceEnum => "inv_zone_image_source", + InvZoneManifestSourceEnum => "inv_zone_manifest_source", IpAttachStateEnum => "ip_attach_state", IpKindEnum => "ip_kind", IpPoolResourceTypeEnum => "ip_pool_resource_type", diff --git a/nexus/db-schema/src/schema.rs b/nexus/db-schema/src/schema.rs index d570bfbb2ef..71bef9fcfe5 100644 --- a/nexus/db-schema/src/schema.rs +++ b/nexus/db-schema/src/schema.rs @@ -1601,6 +1601,15 @@ table! { reconciler_status_sled_config -> Nullable, reconciler_status_timestamp -> Nullable, reconciler_status_duration_secs -> Nullable, + + zone_manifest_boot_disk_path -> Text, + zone_manifest_source -> Nullable, + zone_manifest_mupdate_id -> Nullable, + zone_manifest_boot_disk_error -> Nullable, + + mupdate_override_boot_disk_path -> Text, + mupdate_override_id -> Nullable, + mupdate_override_boot_disk_error -> Nullable, } } @@ -1653,6 +1662,40 @@ table! { } } +table! { + inv_zone_manifest_zone (inv_collection_id, sled_id, zone_file_name) { + inv_collection_id -> Uuid, + sled_id -> Uuid, + zone_file_name -> Text, + path -> Text, + expected_size -> Int8, + expected_sha256 -> Text, + error -> Nullable, + } +} + +table! { + inv_zone_manifest_non_boot (inv_collection_id, sled_id, non_boot_zpool_id) { + inv_collection_id -> Uuid, + sled_id -> Uuid, + non_boot_zpool_id -> Uuid, + path -> Text, + is_valid -> Bool, + message -> Text, + } +} + +table! { + inv_mupdate_override_non_boot (inv_collection_id, sled_id, non_boot_zpool_id) { + inv_collection_id -> Uuid, + sled_id -> Uuid, + non_boot_zpool_id -> Uuid, + path -> Text, + is_valid -> Bool, + message -> Text, + } +} + table! { inv_physical_disk (inv_collection_id, sled_id, slot) { inv_collection_id -> Uuid, diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 444aac265de..420c4b8e54d 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -10,6 +10,7 @@ workspace = true [dependencies] anyhow.workspace = true base64.workspace = true +camino.workspace = true chrono.workspace = true clickhouse-admin-keeper-client.workspace = true clickhouse-admin-server-client.workspace = true @@ -19,6 +20,7 @@ gateway-client.workspace = true gateway-messages.workspace = true gateway-types.workspace = true id-map.workspace = true +iddqd.workspace = true nexus-sled-agent-shared.workspace = true nexus-types.workspace = true omicron-common.workspace = true @@ -26,9 +28,12 @@ omicron-uuid-kinds.workspace = true reqwest.workspace = true serde_json.workspace = true sled-agent-client.workspace = true +sled-agent-types.workspace = true +sled-agent-zone-images-examples.workspace = true slog.workspace = true strum.workspace = true thiserror.workspace = true +tufaceous-artifact.workspace = true typed-rng.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 7c75c94de84..e786817d63a 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -539,6 +539,7 @@ impl CollectionBuilder { ledgered_sled_config: inventory.ledgered_sled_config, reconciler_status: inventory.reconciler_status, last_reconciliation: inventory.last_reconciliation, + zone_image_resolver: inventory.zone_image_resolver, }; if let Some(previous) = self.sleds.get(&sled_id) { diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index e8a4f632341..e1351443c20 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -6,6 +6,7 @@ use crate::CollectionBuilder; use crate::now_db_precision; +use camino::Utf8Path; use clickhouse_admin_types::ClickhouseKeeperClusterMembership; use clickhouse_admin_types::KeeperId; use gateway_client::types::PowerState; @@ -14,6 +15,7 @@ use gateway_client::types::SpComponentCaboose; use gateway_client::types::SpState; use gateway_client::types::SpType; use gateway_types::rot::RotSlot; +use iddqd::id_ord_map; use nexus_sled_agent_shared::inventory::Baseboard; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; @@ -25,6 +27,7 @@ use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZonesConfig; use nexus_sled_agent_shared::inventory::OrphanedDataset; use nexus_sled_agent_shared::inventory::SledRole; +use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use nexus_types::inventory::BaseboardId; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::RotPage; @@ -41,6 +44,26 @@ use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; +use sled_agent_types::zone_images::MupdateOverrideNonBootInfo; +use sled_agent_types::zone_images::MupdateOverrideNonBootMismatch; +use sled_agent_types::zone_images::MupdateOverrideNonBootResult; +use sled_agent_types::zone_images::MupdateOverrideReadError; +use sled_agent_types::zone_images::MupdateOverrideStatus; +use sled_agent_types::zone_images::ResolverStatus; +use sled_agent_types::zone_images::ZoneManifestNonBootInfo; +use sled_agent_types::zone_images::ZoneManifestNonBootMismatch; +use sled_agent_types::zone_images::ZoneManifestNonBootResult; +use sled_agent_types::zone_images::ZoneManifestReadError; +use sled_agent_types::zone_images::ZoneManifestStatus; +use sled_agent_zone_images_examples::BOOT_PATHS; +use sled_agent_zone_images_examples::NON_BOOT_2_PATHS; +use sled_agent_zone_images_examples::NON_BOOT_2_UUID; +use sled_agent_zone_images_examples::NON_BOOT_3_PATHS; +use sled_agent_zone_images_examples::NON_BOOT_3_UUID; +use sled_agent_zone_images_examples::NON_BOOT_PATHS; +use sled_agent_zone_images_examples::NON_BOOT_UUID; +use sled_agent_zone_images_examples::WriteInstallDatasetContext; +use sled_agent_zone_images_examples::dataset_missing_error; use std::sync::Arc; use std::time::Duration; use strum::IntoEnumIterator; @@ -476,6 +499,10 @@ pub fn representative() -> Representative { zpools, datasets, Some(sled14), + zone_image_resolver(ZoneImageResolverExampleKind::Success { + deserialized_zone_manifest: true, + has_mupdate_override: true, + }), ), ) .unwrap(); @@ -504,6 +531,10 @@ pub fn representative() -> Representative { vec![], vec![], Some(sled16), + zone_image_resolver(ZoneImageResolverExampleKind::Success { + deserialized_zone_manifest: false, + has_mupdate_override: false, + }), ), ) .unwrap(); @@ -527,6 +558,13 @@ pub fn representative() -> Representative { vec![], vec![], Some(sled17), + // Simulate a mismatch in this case with the mupdate override + // being present. There's one case that's unexplored: mismatch + // with no mupdate override. But to express that case we would + // need an additional fifth sled. + zone_image_resolver(ZoneImageResolverExampleKind::Mismatch { + has_mupdate_override: true, + }), ), ) .unwrap(); @@ -550,6 +588,8 @@ pub fn representative() -> Representative { // We only have omicron zones for three sleds so report no sled // config here. None, + // Simulate an error here. + zone_image_resolver(ZoneImageResolverExampleKind::Error), ), ) .unwrap(); @@ -634,6 +674,162 @@ pub fn rot_page(unique: &str) -> RotPage { } } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ZoneImageResolverExampleKind { + /// Success, with or without treating the manifest as deserialized and the + /// mupdate override being present. + Success { deserialized_zone_manifest: bool, has_mupdate_override: bool }, + + /// The zone manifest is successfully read but doesn't match entries on + /// disk. + Mismatch { has_mupdate_override: bool }, + + /// Errors while reading the zone manifest and mupdate override status. + Error, +} + +/// Generate an example zone image resolver inventory. +pub fn zone_image_resolver( + kind: ZoneImageResolverExampleKind, +) -> ZoneImageResolverInventory { + let dir_path = Utf8Path::new("/some/path"); + + // Create a bunch of contexts. + let mut cx = WriteInstallDatasetContext::new_basic(); + + let mut invalid_cx = WriteInstallDatasetContext::new_basic(); + invalid_cx.make_error_cases(); + + // Determine the zone manifest and mupdate override results for the boot + // disk. + let (boot_zm_result, boot_override_result) = match kind { + ZoneImageResolverExampleKind::Success { + deserialized_zone_manifest, + has_mupdate_override, + } => { + if !deserialized_zone_manifest { + cx.write_zone_manifest_to_disk(false); + } + let zm_result = Ok( + cx.expected_result(&dir_path.join(&BOOT_PATHS.install_dataset)) + ); + let override_result = + Ok(has_mupdate_override.then(|| cx.override_info())); + (zm_result, override_result) + } + ZoneImageResolverExampleKind::Mismatch { has_mupdate_override } => { + // In this case, the zone manifest result is generated using the + // invalid (mismatched) context. + let zm_result = Ok(invalid_cx + .expected_result(&dir_path.join(&BOOT_PATHS.install_dataset))); + let override_result = + Ok(has_mupdate_override.then(|| cx.override_info())); + (zm_result, override_result) + } + ZoneImageResolverExampleKind::Error => { + // Use the invalid context to generate an error. + let zm_result = Err(ZoneManifestReadError::InstallMetadata( + dataset_missing_error( + &dir_path.join(&BOOT_PATHS.install_dataset), + ), + )); + let override_result = + Err(MupdateOverrideReadError::InstallMetadata( + dataset_missing_error( + &dir_path.join(&BOOT_PATHS.install_dataset), + ), + )); + (zm_result, override_result) + } + }; + + // Generate a status struct first. + let status = ResolverStatus { + zone_manifest: ZoneManifestStatus { + boot_disk_path: dir_path.join(&BOOT_PATHS.zones_json), + boot_disk_result: boot_zm_result, + non_boot_disk_metadata: id_ord_map! { + // Non-boot disk metadata that matches. + ZoneManifestNonBootInfo { + zpool_id: NON_BOOT_UUID, + dataset_dir: dir_path.join(&NON_BOOT_PATHS.install_dataset), + path: dir_path.join(&NON_BOOT_PATHS.zones_json), + // XXX Technically, if the boot disk had an error, this + // can't be Matches. We choose to punt on this issue because + // the conversion to the inventory type squishes down + // errors into a string. + result: ZoneManifestNonBootResult::Matches( + cx.expected_result( + &dir_path.join(&NON_BOOT_PATHS.install_dataset) + ) + ), + }, + // Non-boot disk mismatch (zones different + errors). + ZoneManifestNonBootInfo { + zpool_id: NON_BOOT_2_UUID, + dataset_dir: dir_path.join(&NON_BOOT_2_PATHS.install_dataset), + path: dir_path.join(&NON_BOOT_2_PATHS.zones_json), + result: ZoneManifestNonBootResult::Mismatch( + ZoneManifestNonBootMismatch::ValueMismatch { + non_boot_disk_result: invalid_cx.expected_result( + &dir_path.join(&NON_BOOT_2_PATHS.install_dataset), + ), + }, + ), + }, + // Non-boot disk mismatch (error reading zone manifest). + ZoneManifestNonBootInfo { + zpool_id: NON_BOOT_3_UUID, + dataset_dir: dir_path.join(&NON_BOOT_3_PATHS.install_dataset), + path: dir_path.join(&NON_BOOT_3_PATHS.zones_json), + result: ZoneManifestNonBootResult::ReadError( + dataset_missing_error( + &dir_path.join(&NON_BOOT_3_PATHS.install_dataset), + ).into(), + ), + }, + }, + }, + mupdate_override: MupdateOverrideStatus { + boot_disk_path: dir_path.join(&BOOT_PATHS.mupdate_override_json), + boot_disk_override: boot_override_result, + non_boot_disk_overrides: id_ord_map! { + // Non-boot disk mupdate overrides that match. + MupdateOverrideNonBootInfo { + zpool_id: NON_BOOT_UUID, + path: dir_path.join(&NON_BOOT_PATHS.mupdate_override_json), + // XXX Technically, if the boot disk had an error, this + // can't be Matches. We choose to punt on this issue because + // the conversion to the inventory type squishes down errors + // into a string. + result: MupdateOverrideNonBootResult::MatchesPresent, + }, + // Non-boot disk mupdate overrides that have a mismatch. + MupdateOverrideNonBootInfo { + zpool_id: NON_BOOT_2_UUID, + path: dir_path.join(&NON_BOOT_2_PATHS.mupdate_override_json), + result: MupdateOverrideNonBootResult::Mismatch( + MupdateOverrideNonBootMismatch::BootPresentOtherAbsent, + ), + }, + // Non-boot disk updates (error reading zone manifest). + MupdateOverrideNonBootInfo { + zpool_id: NON_BOOT_3_UUID, + path: dir_path.join(&NON_BOOT_3_PATHS.mupdate_override_json), + result: MupdateOverrideNonBootResult::ReadError( + dataset_missing_error( + &dir_path.join(&NON_BOOT_3_PATHS.install_dataset), + ).into(), + ), + }, + }, + }, + }; + + status.to_inventory() +} + +#[expect(clippy::too_many_arguments)] pub fn sled_agent( sled_id: SledUuid, baseboard: Baseboard, @@ -642,6 +838,7 @@ pub fn sled_agent( zpools: Vec, datasets: Vec, ledgered_sled_config: Option, + zone_image_resolver: ZoneImageResolverInventory, ) -> Inventory { // Assume the `ledgered_sled_config` was reconciled successfully. let last_reconciliation = ledgered_sled_config.clone().map(|config| { @@ -684,5 +881,6 @@ pub fn sled_agent( ledgered_sled_config, reconciler_status, last_reconciliation, + zone_image_resolver, } } diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index abd9e647e49..ac679ef3bc5 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -23,6 +23,7 @@ use nexus_sled_agent_shared::inventory::InventoryDisk; use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::SledRole; +use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use nexus_types::deployment::ClickhousePolicy; use nexus_types::deployment::CockroachDbClusterVersion; use nexus_types::deployment::CockroachDbSettings; @@ -853,6 +854,8 @@ impl Sled { sled_config, ), ), + // XXX: return something more reasonable here? + zone_image_resolver: ZoneImageResolverInventory::new_fake(), } }; @@ -1001,6 +1004,7 @@ impl Sled { ledgered_sled_config: inv_sled_agent.ledgered_sled_config.clone(), reconciler_status: inv_sled_agent.reconciler_status.clone(), last_reconciliation: inv_sled_agent.last_reconciliation.clone(), + zone_image_resolver: inv_sled_agent.zone_image_resolver.clone(), }; Sled { diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index bb92f937f01..5f1215ec5f4 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -2319,6 +2319,82 @@ fn after_148_0_0<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { }) } +fn before_151_0_0<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { + Box::pin(async move { + // Create some fake inventory data to test the zone image resolver migration. + // Insert a sled agent record without the new zone image resolver columns. + let sled_id = Uuid::new_v4(); + let inv_collection_id = Uuid::new_v4(); + + ctx.client + .batch_execute(&format!( + " + INSERT INTO omicron.public.inv_sled_agent + (inv_collection_id, time_collected, source, sled_id, sled_agent_ip, + sled_agent_port, sled_role, usable_hardware_threads, usable_physical_ram, + reservoir_size, reconciler_status_kind) + VALUES + ('{inv_collection_id}', now(), 'test-source', '{sled_id}', '192.168.1.1', + 8080, 'gimlet', 32, 68719476736, 1073741824, 'not-yet-run'); + ", + inv_collection_id = inv_collection_id, + sled_id = sled_id + )) + .await + .expect("inserted pre-migration inv_sled_agent data"); + }) +} + +fn after_151_0_0<'a>(ctx: &'a MigrationContext<'a>) -> BoxFuture<'a, ()> { + Box::pin(async move { + // Verify that the zone image resolver columns have been added with + // correct defaults. + let rows = ctx + .client + .query( + "SELECT zone_manifest_boot_disk_path, zone_manifest_source, + zone_manifest_mupdate_id, zone_manifest_boot_disk_error, + mupdate_override_boot_disk_path, + mupdate_override_id, mupdate_override_boot_disk_error + FROM omicron.public.inv_sled_agent + ORDER BY time_collected", + &[], + ) + .await + .expect("inserted post-migration inv_sled_agent data"); + + assert_eq!(rows.len(), 1); + let row = &rows[0]; + + // Check that path fields have the expected default message. + let zone_manifest_path: String = + row.get("zone_manifest_boot_disk_path"); + let mupdate_override_path: String = + row.get("mupdate_override_boot_disk_path"); + assert_eq!(zone_manifest_path, "old-collection-data-missing"); + assert_eq!(mupdate_override_path, "old-collection-data-missing"); + + // Check that the zone manifest and mupdate override source fields are + // NULL. + let zone_manifest_source: Option = + row.get("zone_manifest_source"); + assert_eq!(zone_manifest_source, None); + let zone_manifest_id: Option = + row.get("zone_manifest_mupdate_id"); + let mupdate_override_id: Option = row.get("mupdate_override_id"); + assert_eq!(zone_manifest_id, None); + assert_eq!(mupdate_override_id, None); + + // Check that error fields have the expected default message. + let zone_manifest_error: String = + row.get("zone_manifest_boot_disk_error"); + let mupdate_override_error: String = + row.get("mupdate_override_boot_disk_error"); + assert_eq!(zone_manifest_error, "old collection, data missing"); + assert_eq!(mupdate_override_error, "old collection, data missing"); + }) +} + // Lazily initializes all migration checks. The combination of Rust function // pointers and async makes defining a static table fairly painful, so we're // using lazy initialization instead. @@ -2391,6 +2467,10 @@ fn get_migration_checks() -> BTreeMap { Version::new(148, 0, 0), DataMigrationFns::new().before(before_148_0_0).after(after_148_0_0), ); + map.insert( + Version::new(151, 0, 0), + DataMigrationFns::new().before(before_151_0_0).after(after_151_0_0), + ); map } diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index f8699b216cb..87dba052cbb 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -27,6 +27,7 @@ use nexus_sled_agent_shared::inventory::InventoryZpool; use nexus_sled_agent_shared::inventory::OmicronSledConfig; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use nexus_sled_agent_shared::inventory::SledRole; +use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use omicron_common::api::external::ByteCount; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; @@ -565,4 +566,5 @@ pub struct SledAgent { pub ledgered_sled_config: Option, pub reconciler_status: ConfigReconcilerInventoryStatus, pub last_reconciliation: Option, + pub zone_image_resolver: ZoneImageResolverInventory, } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 0ac6d4b04e7..0a845a1152e 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -4950,6 +4950,9 @@ "usable_physical_ram": { "$ref": "#/components/schemas/ByteCount" }, + "zone_image_resolver": { + "$ref": "#/components/schemas/ZoneImageResolverInventory" + }, "zpools": { "type": "array", "items": { @@ -4968,6 +4971,7 @@ "sled_role", "usable_hardware_threads", "usable_physical_ram", + "zone_image_resolver", "zpools" ] }, @@ -5324,6 +5328,115 @@ } ] }, + "MupdateOverrideInventory": { + "type": "object", + "properties": { + "boot_disk_override": { + "description": "The boot disk override, or an error if it could not be parsed.", + "x-rust-type": { + "crate": "std", + "parameters": [ + { + "type": "null" + }, + { + "type": "string" + } + ], + "path": "::std::result::Result", + "version": "*" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "ok": { + "type": "string", + "enum": [ + null + ] + } + }, + "required": [ + "ok" + ] + }, + { + "type": "object", + "properties": { + "err": { + "type": "string" + } + }, + "required": [ + "err" + ] + } + ] + }, + "boot_disk_path": { + "description": "The path to the mupdate override JSON on the boot disk.", + "type": "string", + "format": "Utf8PathBuf" + }, + "non_boot_status": { + "title": "IdOrdMap", + "description": "Warnings about non-boot disks, if any.", + "x-rust-type": { + "crate": "iddqd", + "parameters": [ + { + "$ref": "#/components/schemas/MupdateOverrideNonBootInventory" + } + ], + "path": "iddqd::IdOrdMap", + "version": "*" + }, + "type": "array", + "items": { + "$ref": "#/components/schemas/MupdateOverrideNonBootInventory" + }, + "uniqueItems": true + } + }, + "required": [ + "boot_disk_override", + "boot_disk_path", + "non_boot_status" + ] + }, + "MupdateOverrideNonBootInventory": { + "type": "object", + "properties": { + "is_valid": { + "description": "Whether the status is valid.", + "type": "boolean" + }, + "message": { + "description": "A message describing the status. If `is_valid` is false, then this message describes the reason for the invalid status.", + "type": "string" + }, + "path": { + "description": "The path to the mupdate override JSON on the non-boot disk.", + "type": "string", + "format": "Utf8PathBuf" + }, + "zpool_id": { + "description": "The non-boot zpool ID.", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForInternalZpoolKind" + } + ] + } + }, + "required": [ + "is_valid", + "message", + "path", + "zpool_id" + ] + }, "Name": { "title": "A name unique within the parent collection", "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", @@ -5629,6 +5742,50 @@ } ] }, + "OmicronZoneManifestSource": { + "description": "The source of truth for an Omicron zone manifest.", + "oneOf": [ + { + "description": "The manifest was written out by installinator and the mupdate process.", + "type": "object", + "properties": { + "mupdate_id": { + "description": "The UUID of the mupdate.", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForMupdateKind" + } + ] + }, + "source": { + "type": "string", + "enum": [ + "installinator" + ] + } + }, + "required": [ + "mupdate_id", + "source" + ] + }, + { + "description": "The zone manifest was not found during the install process. A synthetic zone manifest was generated by Sled Agent by looking at all the `.tar.gz` files in the install dataset.", + "type": "object", + "properties": { + "source": { + "type": "string", + "enum": [ + "sled_agent" + ] + } + }, + "required": [ + "source" + ] + } + ] + }, "OmicronZoneType": { "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration", "oneOf": [ @@ -7110,6 +7267,14 @@ "type": "string", "format": "uuid" }, + "TypedUuidForInternalZpoolKind": { + "type": "string", + "format": "uuid" + }, + "TypedUuidForMupdateKind": { + "type": "string", + "format": "uuid" + }, "TypedUuidForMupdateOverrideKind": { "type": "string", "format": "uuid" @@ -7549,6 +7714,117 @@ "vni" ] }, + "ZoneArtifactInventory": { + "type": "object", + "properties": { + "expected_hash": { + "description": "The expected hash of the file.", + "type": "string", + "format": "hex string (32 bytes)" + }, + "expected_size": { + "description": "The expected size of the file.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "file_name": { + "description": "The filename.", + "type": "string" + }, + "path": { + "description": "The full path to the file.", + "type": "string", + "format": "Utf8PathBuf" + }, + "status": { + "description": "The status.", + "x-rust-type": { + "crate": "std", + "parameters": [ + { + "type": "null" + }, + { + "type": "string" + } + ], + "path": "::std::result::Result", + "version": "*" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "ok": { + "type": "string", + "enum": [ + null + ] + } + }, + "required": [ + "ok" + ] + }, + { + "type": "object", + "properties": { + "err": { + "type": "string" + } + }, + "required": [ + "err" + ] + } + ] + } + }, + "required": [ + "expected_hash", + "expected_size", + "file_name", + "path", + "status" + ] + }, + "ZoneArtifactsInventory": { + "type": "object", + "properties": { + "artifacts": { + "title": "IdOrdMap", + "description": "The artifacts on disk.", + "x-rust-type": { + "crate": "iddqd", + "parameters": [ + { + "$ref": "#/components/schemas/ZoneArtifactInventory" + } + ], + "path": "iddqd::IdOrdMap", + "version": "*" + }, + "type": "array", + "items": { + "$ref": "#/components/schemas/ZoneArtifactInventory" + }, + "uniqueItems": true + }, + "source": { + "description": "The manifest source.", + "allOf": [ + { + "$ref": "#/components/schemas/OmicronZoneManifestSource" + } + ] + } + }, + "required": [ + "artifacts", + "source" + ] + }, "ZoneBundleCause": { "description": "The reason or cause for a zone bundle, i.e., why it was created.", "oneOf": [ @@ -7633,6 +7909,138 @@ "version" ] }, + "ZoneImageResolverInventory": { + "description": "A simplified form of zone image resolver status.", + "type": "object", + "properties": { + "mupdate_override": { + "description": "The mupdate override status.", + "allOf": [ + { + "$ref": "#/components/schemas/MupdateOverrideInventory" + } + ] + }, + "zone_manifest": { + "description": "The zone manifest status.", + "allOf": [ + { + "$ref": "#/components/schemas/ZoneManifestInventory" + } + ] + } + }, + "required": [ + "mupdate_override", + "zone_manifest" + ] + }, + "ZoneManifestInventory": { + "type": "object", + "properties": { + "boot_disk_path": { + "description": "The path to the zone manifest file on the boot disk.", + "type": "string", + "format": "Utf8PathBuf" + }, + "manifest": { + "description": "The manifest read from disk.", + "x-rust-type": { + "crate": "std", + "parameters": [ + { + "$ref": "#/components/schemas/ZoneArtifactsInventory" + }, + { + "type": "string" + } + ], + "path": "::std::result::Result", + "version": "*" + }, + "oneOf": [ + { + "type": "object", + "properties": { + "ok": { + "$ref": "#/components/schemas/ZoneArtifactsInventory" + } + }, + "required": [ + "ok" + ] + }, + { + "type": "object", + "properties": { + "err": { + "type": "string" + } + }, + "required": [ + "err" + ] + } + ] + }, + "non_boot_status": { + "title": "IdOrdMap", + "description": "Warnings about non-boot disks, if any.", + "x-rust-type": { + "crate": "iddqd", + "parameters": [ + { + "$ref": "#/components/schemas/ZoneManifestNonBootInventory" + } + ], + "path": "iddqd::IdOrdMap", + "version": "*" + }, + "type": "array", + "items": { + "$ref": "#/components/schemas/ZoneManifestNonBootInventory" + }, + "uniqueItems": true + } + }, + "required": [ + "boot_disk_path", + "manifest", + "non_boot_status" + ] + }, + "ZoneManifestNonBootInventory": { + "type": "object", + "properties": { + "is_valid": { + "description": "Whether the status is valid.", + "type": "boolean" + }, + "message": { + "description": "A message describing the status. If `is_valid` is false, then this message describes the reason for the invalid status.", + "type": "string" + }, + "path": { + "description": "The path to the zone manifest JSON on the non-boot disk.", + "type": "string", + "format": "Utf8PathBuf" + }, + "zpool_id": { + "description": "The non-boot zpool ID.", + "allOf": [ + { + "$ref": "#/components/schemas/TypedUuidForInternalZpoolKind" + } + ] + } + }, + "required": [ + "is_valid", + "message", + "path", + "zpool_id" + ] + }, "ZpoolName": { "title": "The name of a Zpool", "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 0195bfd06cc..3a7c3fb0a02 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3611,6 +3611,11 @@ AS ENUM ( 'idle' ); +CREATE TYPE IF NOT EXISTS omicron.public.inv_zone_manifest_source AS ENUM ( + 'installinator', + 'sled-agent' +); + -- observations from and about sled agents CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( -- where this observation came from @@ -3661,6 +3666,35 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( -- only present if `reconciler_status_kind != 'not-yet-run'` reconciler_status_duration_secs FLOAT, + -- Columns making up the zone image resolver's zone manifest description: + -- + -- The path to the boot disk image file. + zone_manifest_boot_disk_path TEXT NOT NULL, + -- The source of the zone manifest on the boot disk: from installinator or + -- sled-agent (synthetic). NULL means there is an error reading the zone manifest. + zone_manifest_source omicron.public.inv_zone_manifest_source, + -- The mupdate ID that created the zone manifest if this is from installinator. If + -- this is NULL, then either the zone manifest is synthetic or there was an + -- error reading the zone manifest. + zone_manifest_mupdate_id UUID, + -- Message describing the status of the zone manifest on the boot disk. If + -- this is NULL, then the zone manifest was successfully read, and the + -- inv_zone_manifest_zone table has entries corresponding to the zone + -- manifest. + zone_manifest_boot_disk_error TEXT, + + -- Columns making up the zone image resolver's mupdate override description. + mupdate_override_boot_disk_path TEXT NOT NULL, + -- The ID of the mupdate override. NULL means either that the mupdate + -- override was not found or that we failed to read it -- the two cases are + -- differentiated by the presence of a non-NULL value in the + -- mupdate_override_boot_disk_error column. + mupdate_override_id UUID, + -- Error reading the mupdate override, if any. If this is NULL then + -- the mupdate override was either successfully read or is not + -- present. + mupdate_override_boot_disk_error TEXT, + CONSTRAINT reconciler_status_sled_config_present_if_running CHECK ( (reconciler_status_kind = 'running' AND reconciler_status_sled_config IS NOT NULL) @@ -3678,6 +3712,38 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_sled_agent ( AND reconciler_status_duration_secs IS NOT NULL) ), + -- For the zone manifest, there are three valid states: + -- 1. Successfully read from installinator (has mupdate_id, no error) + -- 2. Synthetic from sled-agent (no mupdate_id, no error) + -- 3. Error reading (no mupdate_id, has error) + -- + -- This is equivalent to Result. + CONSTRAINT zone_manifest_consistency CHECK ( + (zone_manifest_source = 'installinator' + AND zone_manifest_mupdate_id IS NOT NULL + AND zone_manifest_boot_disk_error IS NULL) + OR (zone_manifest_source = 'sled-agent' + AND zone_manifest_mupdate_id IS NULL + AND zone_manifest_boot_disk_error IS NULL) + OR ( + zone_manifest_source IS NULL + AND zone_manifest_mupdate_id IS NULL + AND zone_manifest_boot_disk_error IS NOT NULL + ) + ), + + -- For the mupdate override, three states are valid: + -- 1. No override, no error + -- 2. Override, no error + -- 3. No override, error + -- + -- This is equivalent to Result, String>. + CONSTRAINT mupdate_override_consistency CHECK ( + (mupdate_override_id IS NULL + AND mupdate_override_boot_disk_error IS NOT NULL) + OR mupdate_override_boot_disk_error IS NULL + ), + PRIMARY KEY (inv_collection_id, sled_id) ); @@ -3892,6 +3958,87 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_last_reconciliation_zone_result ( PRIMARY KEY (inv_collection_id, sled_id, zone_id) ); +-- A table describing a single zone within a zone manifest collected by inventory. +CREATE TABLE IF NOT EXISTS omicron.public.inv_zone_manifest_zone ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + + -- Zone file name, part of the primary key within this table. + zone_file_name TEXT NOT NULL, + + -- The full path to the file. + path TEXT NOT NULL, + + -- The expected file size. + expected_size INT8 NOT NULL, + + -- The expected hash. + expected_sha256 STRING(64) NOT NULL, + + -- The error while reading the zone or matching it to the manifest, if any. + -- NULL indicates success. + error TEXT, + + PRIMARY KEY (inv_collection_id, sled_id, zone_file_name) +); + +-- A table describing status for a single zone manifest on a non-boot disk +-- collected by inventory. +CREATE TABLE IF NOT EXISTS omicron.public.inv_zone_manifest_non_boot ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + + -- unique ID for this non-boot disk + non_boot_zpool_id UUID NOT NULL, + + -- The full path to the zone manifest. + path TEXT NOT NULL, + + -- Whether the non-boot disk is in a valid state. + is_valid BOOLEAN NOT NULL, + + -- A message attached to this disk. + message TEXT NOT NULL, + + PRIMARY KEY (inv_collection_id, sled_id, non_boot_zpool_id) +); + +-- A table describing status for a single mupdate override on a non-boot disk +-- collected by inventory. +CREATE TABLE IF NOT EXISTS omicron.public.inv_mupdate_override_non_boot ( + -- where this observation came from + -- (foreign key into `inv_collection` table) + inv_collection_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a sled will report an id that we don't know about) + sled_id UUID NOT NULL, + + -- unique id for this non-boot disk + non_boot_zpool_id UUID NOT NULL, + + -- The full path to the mupdate override file. + path TEXT NOT NULL, + + -- Whether the non-boot disk is in a valid state. + is_valid BOOLEAN NOT NULL, + + -- A message attached to this disk. + message TEXT NOT NULL, + + PRIMARY KEY (inv_collection_id, sled_id, non_boot_zpool_id) +); + CREATE TYPE IF NOT EXISTS omicron.public.zone_type AS ENUM ( 'boundary_ntp', 'clickhouse', @@ -5760,7 +5907,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '150.0.0', NULL) + (TRUE, NOW(), NOW(), '151.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/zone-image-resolver-inventory/up01.sql b/schema/crdb/zone-image-resolver-inventory/up01.sql new file mode 100644 index 00000000000..459e843534d --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up01.sql @@ -0,0 +1,5 @@ +-- Add the inv_zone_manifest_source enum. +CREATE TYPE IF NOT EXISTS inv_zone_manifest_source AS ENUM ( + 'installinator', + 'sled-agent' +); \ No newline at end of file diff --git a/schema/crdb/zone-image-resolver-inventory/up02.sql b/schema/crdb/zone-image-resolver-inventory/up02.sql new file mode 100644 index 00000000000..ef8dfb5fc2b --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up02.sql @@ -0,0 +1,9 @@ +-- Add zone image resolver columns to the sled inventory table. +ALTER TABLE omicron.public.inv_sled_agent + ADD COLUMN IF NOT EXISTS zone_manifest_boot_disk_path TEXT NOT NULL DEFAULT 'old-collection-data-missing', + ADD COLUMN IF NOT EXISTS zone_manifest_source inv_zone_manifest_source, + ADD COLUMN IF NOT EXISTS zone_manifest_mupdate_id UUID, + ADD COLUMN IF NOT EXISTS zone_manifest_boot_disk_error TEXT DEFAULT 'old collection, data missing', + ADD COLUMN IF NOT EXISTS mupdate_override_boot_disk_path TEXT NOT NULL DEFAULT 'old-collection-data-missing', + ADD COLUMN IF NOT EXISTS mupdate_override_id UUID, + ADD COLUMN IF NOT EXISTS mupdate_override_boot_disk_error TEXT DEFAULT 'old collection, data missing'; diff --git a/schema/crdb/zone-image-resolver-inventory/up03.sql b/schema/crdb/zone-image-resolver-inventory/up03.sql new file mode 100644 index 00000000000..0c713ee646b --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up03.sql @@ -0,0 +1,20 @@ +-- Add constraints for zone image resolver columns. +ALTER TABLE omicron.public.inv_sled_agent + ADD CONSTRAINT IF NOT EXISTS zone_manifest_consistency CHECK ( + (zone_manifest_source = 'installinator' + AND zone_manifest_mupdate_id IS NOT NULL + AND zone_manifest_boot_disk_error IS NULL) + OR (zone_manifest_source = 'sled-agent' + AND zone_manifest_mupdate_id IS NULL + AND zone_manifest_boot_disk_error IS NULL) + OR ( + zone_manifest_source IS NULL + AND zone_manifest_mupdate_id IS NULL + AND zone_manifest_boot_disk_error IS NOT NULL + ) + ), + ADD CONSTRAINT IF NOT EXISTS mupdate_override_consistency CHECK ( + (mupdate_override_id IS NULL + AND mupdate_override_boot_disk_error IS NOT NULL) + OR mupdate_override_boot_disk_error IS NULL + ); diff --git a/schema/crdb/zone-image-resolver-inventory/up04.sql b/schema/crdb/zone-image-resolver-inventory/up04.sql new file mode 100644 index 00000000000..ca429da251b --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up04.sql @@ -0,0 +1,12 @@ +-- Create table for zone manifest zone inventory. +CREATE TABLE IF NOT EXISTS omicron.public.inv_zone_manifest_zone ( + inv_collection_id UUID NOT NULL, + sled_id UUID NOT NULL, + zone_file_name TEXT NOT NULL, + path TEXT NOT NULL, + expected_size INT8 NOT NULL, + expected_sha256 STRING(64) NOT NULL, + error TEXT, + + PRIMARY KEY (inv_collection_id, sled_id, zone_file_name) +); diff --git a/schema/crdb/zone-image-resolver-inventory/up05.sql b/schema/crdb/zone-image-resolver-inventory/up05.sql new file mode 100644 index 00000000000..649eddf95bf --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up05.sql @@ -0,0 +1,11 @@ +-- Create table for zone manifest non-boot disk inventory. +CREATE TABLE IF NOT EXISTS omicron.public.inv_zone_manifest_non_boot ( + inv_collection_id UUID NOT NULL, + sled_id UUID NOT NULL, + non_boot_zpool_id UUID NOT NULL, + path TEXT NOT NULL, + is_valid BOOLEAN NOT NULL, + message TEXT NOT NULL, + + PRIMARY KEY (inv_collection_id, sled_id, non_boot_zpool_id) +); diff --git a/schema/crdb/zone-image-resolver-inventory/up06.sql b/schema/crdb/zone-image-resolver-inventory/up06.sql new file mode 100644 index 00000000000..77cf35d21bc --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up06.sql @@ -0,0 +1,11 @@ +-- Create table for mupdate override non-boot disk inventory. +CREATE TABLE IF NOT EXISTS omicron.public.inv_mupdate_override_non_boot ( + inv_collection_id UUID NOT NULL, + sled_id UUID NOT NULL, + non_boot_zpool_id UUID NOT NULL, + path TEXT NOT NULL, + is_valid BOOLEAN NOT NULL, + message TEXT NOT NULL, + + PRIMARY KEY (inv_collection_id, sled_id, non_boot_zpool_id) +); diff --git a/schema/crdb/zone-image-resolver-inventory/up07.sql b/schema/crdb/zone-image-resolver-inventory/up07.sql new file mode 100644 index 00000000000..dfa10c5b80a --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up07.sql @@ -0,0 +1,2 @@ +-- Remove default from zone_manifest_boot_disk_path. +ALTER TABLE omicron.public.inv_sled_agent ALTER COLUMN zone_manifest_boot_disk_path DROP DEFAULT; diff --git a/schema/crdb/zone-image-resolver-inventory/up08.sql b/schema/crdb/zone-image-resolver-inventory/up08.sql new file mode 100644 index 00000000000..6c36e81f0cc --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up08.sql @@ -0,0 +1,2 @@ +-- Remove default from mupdate_override_boot_disk_path. +ALTER TABLE omicron.public.inv_sled_agent ALTER COLUMN mupdate_override_boot_disk_path DROP DEFAULT; diff --git a/schema/crdb/zone-image-resolver-inventory/up09.sql b/schema/crdb/zone-image-resolver-inventory/up09.sql new file mode 100644 index 00000000000..5024cdd3f6b --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up09.sql @@ -0,0 +1,2 @@ +-- Remove default from zone_manifest_boot_disk_error. +ALTER TABLE omicron.public.inv_sled_agent ALTER COLUMN zone_manifest_boot_disk_error DROP DEFAULT; diff --git a/schema/crdb/zone-image-resolver-inventory/up10.sql b/schema/crdb/zone-image-resolver-inventory/up10.sql new file mode 100644 index 00000000000..06512aa2b55 --- /dev/null +++ b/schema/crdb/zone-image-resolver-inventory/up10.sql @@ -0,0 +1,2 @@ +-- Remove default from mupdate_override_boot_disk_error. +ALTER TABLE omicron.public.inv_sled_agent ALTER COLUMN mupdate_override_boot_disk_error DROP DEFAULT; diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 7aec5989b19..3732bca059a 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -1155,6 +1155,7 @@ impl ServicePortBuilder { mod tests { use super::*; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; + use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; use omicron_common::address::IpRange; use omicron_common::api::external::ByteCount; use omicron_common::api::internal::shared::AllowedSourceIps; @@ -1378,6 +1379,7 @@ mod tests { ledgered_sled_config: None, reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, last_reconciliation: None, + zone_image_resolver: ZoneImageResolverInventory::new_fake(), }, is_scrimlet, )]; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 3b38b57dd71..55d5eb15e76 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1713,7 +1713,7 @@ mod test { use nexus_reconfigurator_blippy::{Blippy, BlippyReportSortKey}; use nexus_sled_agent_shared::inventory::{ Baseboard, ConfigReconcilerInventoryStatus, Inventory, InventoryDisk, - OmicronZoneType, SledRole, + OmicronZoneType, SledRole, ZoneImageResolverInventory, }; use omicron_common::{ address::{Ipv6Subnet, SLED_PREFIX, get_sled_address}, @@ -1761,6 +1761,7 @@ mod test { ledgered_sled_config: None, reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, last_reconciliation: None, + zone_image_resolver: ZoneImageResolverInventory::new_fake(), }, true, ) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index f032c75aec8..6676193c028 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -3412,6 +3412,11 @@ impl ServiceManager { .await } + /// Returns a reference to the zone image resolver. + pub(crate) fn zone_image_resolver(&self) -> &ZoneImageSourceResolver { + &self.inner.zone_image_resolver + } + // Forcefully initialize a sled-local switch zone. // // This is a helper function for "ensure_switch_zone". diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 46fdaa6a356..3ded1197d25 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -26,7 +26,7 @@ use futures::Stream; use nexus_sled_agent_shared::inventory::{ ConfigReconcilerInventoryStatus, Inventory, InventoryDataset, InventoryDisk, InventoryZpool, OmicronSledConfig, OmicronZonesConfig, - SledRole, + SledRole, ZoneImageResolverInventory, }; use omicron_common::api::external::{ ByteCount, DiskState, Error, Generation, ResourceType, @@ -807,6 +807,8 @@ impl SledAgent { ledgered_sled_config: Some(sled_config), reconciler_status: ConfigReconcilerInventoryStatus::NotYetRun, last_reconciliation: None, + // TODO: simulate the zone image resolver with greater fidelity + zone_image_resolver: ZoneImageResolverInventory::new_fake(), }) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 10739f05ecb..bf2c693b25e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -1135,6 +1135,8 @@ impl SledAgent { let reservoir_size = self.inner.instances.reservoir_size(); let sled_role = if is_scrimlet { SledRole::Scrimlet } else { SledRole::Gimlet }; + let zone_image_resolver = + self.inner.services.zone_image_resolver().status().to_inventory(); let ReconcilerInventory { disks, @@ -1159,6 +1161,7 @@ impl SledAgent { ledgered_sled_config, reconciler_status, last_reconciliation, + zone_image_resolver, }) } diff --git a/sled-agent/types/src/zone_images.rs b/sled-agent/types/src/zone_images.rs index 05d4a96e36c..823dd0e10d5 100644 --- a/sled-agent/types/src/zone_images.rs +++ b/sled-agent/types/src/zone_images.rs @@ -6,6 +6,14 @@ use std::{fmt, fs::FileType, io, sync::Arc}; use camino::Utf8PathBuf; use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; +use nexus_sled_agent_shared::inventory::MupdateOverrideInfoInventory; +use nexus_sled_agent_shared::inventory::MupdateOverrideInventory; +use nexus_sled_agent_shared::inventory::MupdateOverrideNonBootInventory; +use nexus_sled_agent_shared::inventory::ZoneArtifactInventory; +use nexus_sled_agent_shared::inventory::ZoneArtifactsInventory; +use nexus_sled_agent_shared::inventory::ZoneImageResolverInventory; +use nexus_sled_agent_shared::inventory::ZoneManifestInventory; +use nexus_sled_agent_shared::inventory::ZoneManifestNonBootInventory; use omicron_common::update::{ MupdateOverrideInfo, OmicronZoneManifest, OmicronZoneManifestSource, }; @@ -25,6 +33,16 @@ pub struct ResolverStatus { pub mupdate_override: MupdateOverrideStatus, } +impl ResolverStatus { + /// Convert this status to the inventory format. + pub fn to_inventory(&self) -> ZoneImageResolverInventory { + ZoneImageResolverInventory { + zone_manifest: self.zone_manifest.to_inventory(), + mupdate_override: self.mupdate_override.to_inventory(), + } + } +} + /// Describes the current state of zone manifests. #[derive(Clone, Debug)] pub struct ZoneManifestStatus { @@ -40,6 +58,33 @@ pub struct ZoneManifestStatus { pub non_boot_disk_metadata: IdOrdMap, } +impl ZoneManifestStatus { + /// Convert this status to the inventory format. + pub fn to_inventory(&self) -> ZoneManifestInventory { + let manifest = match &self.boot_disk_result { + Ok(artifacts_result) => Ok(artifacts_result.to_inventory()), + Err(err) => Err(err.to_string()), + }; + + let non_boot_status = self + .non_boot_disk_metadata + .iter() + .map(|info| ZoneManifestNonBootInventory { + zpool_id: info.zpool_id, + path: info.path.clone(), + is_valid: info.result.is_valid(), + message: info.result.display().to_string(), + }) + .collect(); + + ZoneManifestInventory { + boot_disk_path: self.boot_disk_path.clone(), + manifest, + non_boot_status, + } + } +} + /// The result of reading artifacts from an install dataset. /// /// This may or may not be valid, depending on the status of the artifacts. See @@ -63,6 +108,14 @@ impl ZoneManifestArtifactsResult { artifacts: &self.data, } } + + /// Convert this result to the inventory format. + pub fn to_inventory(&self) -> ZoneArtifactsInventory { + let artifacts = + self.data.iter().map(|artifact| artifact.to_inventory()).collect(); + + ZoneArtifactsInventory { source: self.manifest.source, artifacts } + } } pub struct ZoneManifestArtifactsDisplay<'a> { @@ -127,6 +180,31 @@ impl ZoneManifestArtifactResult { pub fn display(&self) -> ZoneManifestArtifactDisplay<'_> { ZoneManifestArtifactDisplay { artifact: self } } + + /// Convert this result to inventory format. + pub fn to_inventory(&self) -> ZoneArtifactInventory { + let status = match &self.status { + ArtifactReadResult::Valid => Ok(()), + ArtifactReadResult::Mismatch { actual_size, actual_hash } => { + Err(format!( + "size/hash mismatch: expected {} bytes/{}, got {} bytes/{}", + self.expected_size, + self.expected_hash, + actual_size, + actual_hash + )) + } + ArtifactReadResult::Error(err) => Err(err.to_string()), + }; + + ZoneArtifactInventory { + file_name: self.file_name.clone(), + path: self.path.clone(), + expected_size: self.expected_size, + expected_hash: self.expected_hash, + status, + } + } } impl IdOrdItem for ZoneManifestArtifactResult { @@ -244,6 +322,11 @@ impl ZoneManifestNonBootResult { } } + /// Returns a displayable representation of this result. + pub fn display(&self) -> ZoneManifestNonBootDisplay<'_> { + ZoneManifestNonBootDisplay { result: self } + } + fn log_to(&self, log: &slog::Logger) { match self { Self::Matches(result) => { @@ -293,6 +376,47 @@ impl ZoneManifestNonBootResult { } } +pub struct ZoneManifestNonBootDisplay<'a> { + result: &'a ZoneManifestNonBootResult, +} + +impl fmt::Display for ZoneManifestNonBootDisplay<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.result { + ZoneManifestNonBootResult::Matches(result) => { + if result.is_valid() { + write!(f, "valid zone manifest: {}", result.display()) + } else { + write!(f, "invalid zone manifest: {}", result.display()) + } + } + ZoneManifestNonBootResult::Mismatch(mismatch) => match mismatch { + ZoneManifestNonBootMismatch::ValueMismatch { + non_boot_disk_result, + } => { + write!( + f, + "contents differ from boot disk: {}", + non_boot_disk_result.display() + ) + } + ZoneManifestNonBootMismatch::BootDiskReadError { + non_boot_disk_result, + } => { + write!( + f, + "boot disk read error, non-boot disk: {}", + non_boot_disk_result.display() + ) + } + }, + ZoneManifestNonBootResult::ReadError(error) => { + write!(f, "read error: {}", error) + } + } + } +} + #[derive(Clone, Debug, PartialEq)] pub enum ZoneManifestNonBootMismatch { /// The file's contents differ between the boot disk and the other disk. @@ -321,6 +445,36 @@ pub struct MupdateOverrideStatus { pub non_boot_disk_overrides: IdOrdMap, } +impl MupdateOverrideStatus { + /// Convert this status to inventory format. + pub fn to_inventory(&self) -> MupdateOverrideInventory { + let boot_disk_override = match &self.boot_disk_override { + Ok(Some(override_info)) => Ok(Some(MupdateOverrideInfoInventory { + mupdate_override_id: override_info.mupdate_uuid, + })), + Ok(None) => Ok(None), + Err(err) => Err(err.to_string()), + }; + + let non_boot_status = self + .non_boot_disk_overrides + .iter() + .map(|info| MupdateOverrideNonBootInventory { + zpool_id: info.zpool_id, + path: info.path.clone(), + is_valid: info.result.is_valid(), + message: info.result.display().to_string(), + }) + .collect(); + + MupdateOverrideInventory { + boot_disk_path: self.boot_disk_path.clone(), + boot_disk_override, + non_boot_status, + } + } +} + /// Describes the result of reading a mupdate override file from a non-boot disk. #[derive(Clone, Debug, PartialEq)] pub struct MupdateOverrideNonBootInfo { @@ -398,6 +552,77 @@ pub enum MupdateOverrideNonBootResult { ReadError(MupdateOverrideReadError), } +impl MupdateOverrideNonBootResult { + /// Returns true if the status is considered to be valid. + pub fn is_valid(&self) -> bool { + match self { + MupdateOverrideNonBootResult::MatchesPresent + | MupdateOverrideNonBootResult::MatchesAbsent => true, + MupdateOverrideNonBootResult::Mismatch(_) + | MupdateOverrideNonBootResult::ReadError(_) => false, + } + } + + /// Returns a displayable representation of this result. + pub fn display(&self) -> MupdateOverrideNonBootDisplay<'_> { + MupdateOverrideNonBootDisplay { result: self } + } +} + +pub struct MupdateOverrideNonBootDisplay<'a> { + result: &'a MupdateOverrideNonBootResult, +} + +impl fmt::Display for MupdateOverrideNonBootDisplay<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.result { + MupdateOverrideNonBootResult::MatchesPresent + | MupdateOverrideNonBootResult::MatchesAbsent => { + // This should not be called for matching cases + write!(f, "matches boot disk") + } + MupdateOverrideNonBootResult::Mismatch(mismatch) => match mismatch { + MupdateOverrideNonBootMismatch::BootPresentOtherAbsent => { + write!( + f, + "boot disk has override but non-boot disk does not" + ) + } + MupdateOverrideNonBootMismatch::BootAbsentOtherPresent { + non_boot_disk_info, + } => { + write!( + f, + "non-boot disk has override ({:?}) but boot disk does not", + non_boot_disk_info + ) + } + MupdateOverrideNonBootMismatch::ValueMismatch { + non_boot_disk_info, + } => { + write!( + f, + "boot disk and non-boot disk have different overrides (non-boot: {:?})", + non_boot_disk_info + ) + } + MupdateOverrideNonBootMismatch::BootDiskReadError { + non_boot_disk_info, + } => { + write!( + f, + "error reading boot disk, non-boot disk override: {:?}", + non_boot_disk_info + ) + } + }, + MupdateOverrideNonBootResult::ReadError(err) => { + write!(f, "read error: {}", err) + } + } + } +} + /// Describes a mismatch between the boot disk and a non-boot disk. #[derive(Clone, Debug, PartialEq)] pub enum MupdateOverrideNonBootMismatch { diff --git a/sled-agent/zone-images/src/source_resolver.rs b/sled-agent/zone-images/src/source_resolver.rs index 9eaa83192a7..328973cd0dd 100644 --- a/sled-agent/zone-images/src/source_resolver.rs +++ b/sled-agent/zone-images/src/source_resolver.rs @@ -340,4 +340,37 @@ mod tests { logctx.cleanup_successful(); } + + /// Test that the resolver status can be converted to inventory format. + #[test] + fn resolver_status_to_inventory() { + let logctx = LogContext::new( + "resolver_status_to_inventory", + &ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Debug }, + ); + let dir = Utf8TempDir::new().unwrap(); + dir.child(&BOOT_PATHS.install_dataset).create_dir_all().unwrap(); + + let internal_disks_rx = + make_internal_disks_rx(dir.path(), BOOT_UUID, &[]); + let resolver = ZoneImageSourceResolver::new( + &logctx.log, + internal_disks_rx.current_with_boot_disk(), + ); + + let status = resolver.status(); + let inventory = status.to_inventory(); + + // Verify the conversion works + assert_eq!( + inventory.zone_manifest.boot_disk_path, + status.zone_manifest.boot_disk_path + ); + assert_eq!( + inventory.mupdate_override.boot_disk_path, + status.mupdate_override.boot_disk_path + ); + + logctx.cleanup_successful(); + } }