oxidecomputer · andrewjstone · Apr 17, 2025 · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025
diff --git a/trust-quorum/src/coordinator_state.rs b/trust-quorum/src/coordinator_state.rs
diff --git a/trust-quorum/src/crypto.rs b/trust-quorum/src/crypto.rs
@@ -4,9 +4,13 @@
 
 //! Various cryptographic constructs used by trust quroum.
 
+use crate::{Epoch, Threshold};
 use bootstore::trust_quorum::RackSecret as LrtqRackSecret;
+use chacha20poly1305::{ChaCha20Poly1305, Key, KeyInit, aead, aead::Aead};
 use derive_more::From;
 use gfss::shamir::{self, CombineError, SecretShares, Share, SplitError};
+use hkdf::Hkdf;
+use omicron_uuid_kinds::{GenericUuid, RackUuid};
 use rand::RngCore;
 use rand::rngs::OsRng;
 use secrecy::{DebugSecret, ExposeSecret, Secret};
@@ -15,9 +19,7 @@ use sha3::{Digest, Sha3_256};
 use slog_error_chain::SlogInlineError;
 use std::fmt::Debug;
 use subtle::ConstantTimeEq;
-use zeroize::{Zeroize, ZeroizeOnDrop};
-
-use crate::Threshold;
+use zeroize::{Zeroize, ZeroizeOnDrop, Zeroizing};
 
 /// Each share contains a byte for the y-coordinate of 32 points on 32 different
 /// polynomials over Ed25519. All points share an x-coordinate, which is the 0th
@@ -203,6 +205,15 @@ impl RackSecret {
         let secret = shamir::compute_secret(shares)?.try_into()?;
         Ok(secret)
     }
+
+    pub fn reconstruct_from_iter<'a>(
+        shares: impl Iterator<Item = &'a Share>,
+    ) -> Result<ReconstructedRackSecret, RackSecretReconstructError> {
+        let mut shares: Vec<Share> = shares.cloned().collect();
+        let res = RackSecret::reconstruct(&shares);
+        shares.zeroize();
+        res
+    }
 }
 
 impl DebugSecret for RackSecret {}
@@ -242,6 +253,63 @@ impl Default for Salt {
     }
 }
 
+/// Encrypt the old rack secret with a key derived from the new rack secret.
+///
+/// A random salt is generated and returned along with the encrypted secret. Key
+/// derivation context includes `rack_id`, `old_epoch`, and `new_epoch`.
+pub fn encrypt_old_rack_secret(
+    old_rack_secret: ReconstructedRackSecret,
+    new_rack_secret: ReconstructedRackSecret,
+    rack_id: RackUuid,
+    old_epoch: Epoch,
+    new_epoch: Epoch,
+) -> aead::Result<(EncryptedRackSecret, Salt)> {
+    let salt = Salt::new();
+    let cipher = derive_encryption_key_for_rack_secret(
+        new_rack_secret,
+        salt,
+        rack_id,
+        old_epoch,
+        new_epoch,
+    );
+
+    // This key is only used to encrypt one plaintext. A nonce of all zeroes is
+    // all that's required.
+    let nonce = [0u8; 12].into();
+    let encrypted_rack_secret = EncryptedRackSecret(
+        cipher.encrypt(&nonce, old_rack_secret.expose_secret().as_ref())?,
+    );
+
+    Ok((encrypted_rack_secret, salt))
+}
+
+fn derive_encryption_key_for_rack_secret(
+    new_rack_secret: ReconstructedRackSecret,
+    salt: Salt,
+    rack_id: RackUuid,
+    old_epoch: Epoch,
+    new_epoch: Epoch,
+) -> ChaCha20Poly1305 {
+    let prk = Hkdf::<Sha3_256>::new(
+        Some(&salt.0[..]),
+        new_rack_secret.expose_secret(),
+    );
+
+    // The "info" string is context to bind the key to its purpose
+    let mut key = Zeroizing::new([0u8; 32]);
+    prk.expand_multi_info(
+        &[
+            b"trust-quorum-v1-rack-secret",
+            rack_id.as_untyped_uuid().as_ref(),
+            &new_epoch.0.to_be_bytes(),
+            &old_epoch.0.to_be_bytes(),
+        ],
+        key.as_mut(),
+    )
+    .unwrap();
+    ChaCha20Poly1305::new(Key::from_slice(key.as_ref()))
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

diff --git a/trust-quorum/src/errors.rs b/trust-quorum/src/errors.rs
@@ -0,0 +1,103 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Various errors for the trust quorum APIs
+
+use crate::configuration::ConfigurationError;
+use crate::{Epoch, PlatformId, Threshold};
+use omicron_uuid_kinds::RackUuid;
+
+#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
+pub enum CommitError {
+    #[error("invalid rack id")]
+    InvalidRackId(
+        #[from]
+        #[source]
+        MismatchedRackIdError,
+    ),
+
+    #[error("missing prepare msg")]
+    MissingPrepare,
+
+    #[error("prepare for a later configuration exists")]
+    OutOfOrderCommit,
+}
+
+#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
+#[error(
+    "sled was decommissioned on msg from {from:?} at epoch {epoch:?}: last prepared epoch = {last_prepared_epoch:?}"
+)]
+pub struct SledDecommissionedError {
+    pub from: PlatformId,
+    pub epoch: Epoch,
+    pub last_prepared_epoch: Option<Epoch>,
+}
+
+#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
+#[error("mismatched rack id: expected {expected:?}, got {got:?}")]
+pub struct MismatchedRackIdError {
+    pub expected: RackUuid,
+    pub got: RackUuid,
+}
+
+#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)]
+pub enum ReconfigurationError {
+    #[error("reconfiguration coordinator must be a member of the new group")]
+    CoordinatorMustBeAMemberOfNewGroup,
+
+    #[error("upgrade from LRTQ required")]
+    UpgradeFromLrtqRequired,
+
+    #[error(
+        "number of members: {num_members:?} must be greater than threshold: {threshold:?}"
+    )]
+    ThresholdMismatch { num_members: usize, threshold: Threshold },
+
+    #[error(
+        "invalid membership size: {0:?}: must be between 3 and 32 inclusive"
+    )]
+    InvalidMembershipSize(usize),
+
+    #[error(
+        "invalid threshold: {0:?}: threshold must be between 2 and 31 inclusive"
+    )]
+    InvalidThreshold(Threshold),
+
+    #[error(
+        "Node has last committed epoch of {node_epoch:?}, message contains {msg_epoch:?}"
+    )]
+    LastCommittedEpochMismatch {
+        node_epoch: Option<Epoch>,
+        msg_epoch: Option<Epoch>,
+    },
+
+    #[error(
+        "sled has already prepared a request at epoch {existing:?}, and cannot prepare another at a smaller or equivalent epoch {new:?}"
+    )]
+    PreparedEpochMismatch { existing: Epoch, new: Epoch },
+
+    #[error("invalid rack id in reconfigure msg")]
+    InvalidRackId(
+        #[from]
+        #[source]
+        MismatchedRackIdError,
+    ),
+
+    #[error("cannot reconfigure a decommissioned sled")]
+    DecommissionedSled(
+        #[from]
+        #[source]
+        SledDecommissionedError,
+    ),
+    #[error(
+        "reconfiguration in progress at epoch {current_epoch:?}: cannot reconfigure for older epoch {msg_epoch:?}"
+    )]
+    ReconfigurationInProgress { current_epoch: Epoch, msg_epoch: Epoch },
+
+    #[error("mismatched reconfiguration requests for epoch {0:?}")]
+    MismatchedReconfigurationForSameEpoch(Epoch),
+
+    #[error(transparent)]
+    Configuration(#[from] ConfigurationError),
+}
diff --git a/trust-quorum/src/lib.rs b/trust-quorum/src/lib.rs
@@ -15,6 +15,7 @@ use serde::{Deserialize, Serialize};
 mod configuration;
 mod coordinator_state;
 pub(crate) mod crypto;
+pub(crate) mod errors;
 mod messages;
 mod node;
 mod persistent_state;
@@ -40,6 +41,13 @@ pub use persistent_state::{PersistentState, PersistentStateSummary};
 )]
 pub struct Epoch(pub u64);
 
+impl Epoch {
+    // Increment the epoch and return the new value
+    pub fn inc(&self) -> Epoch {
+        Epoch(self.0 + 1)
+    }
+}
+
 /// The number of shares required to reconstruct the rack secret
 ///
 /// Typically referred to as `k` in the docs