-
Notifications
You must be signed in to change notification settings - Fork 43
Trust quorum: reconfiguration and commit behavior #8052
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,9 +4,13 @@ | |
|
||
//! Various cryptographic constructs used by trust quroum. | ||
|
||
use crate::{Epoch, Threshold}; | ||
use bootstore::trust_quorum::RackSecret as LrtqRackSecret; | ||
use chacha20poly1305::{ChaCha20Poly1305, Key, KeyInit, aead, aead::Aead}; | ||
use derive_more::From; | ||
use gfss::shamir::{self, CombineError, SecretShares, Share, SplitError}; | ||
use hkdf::Hkdf; | ||
use omicron_uuid_kinds::{GenericUuid, RackUuid}; | ||
use rand::RngCore; | ||
use rand::rngs::OsRng; | ||
use secrecy::{DebugSecret, ExposeSecret, Secret}; | ||
|
@@ -15,9 +19,7 @@ use sha3::{Digest, Sha3_256}; | |
use slog_error_chain::SlogInlineError; | ||
use std::fmt::Debug; | ||
use subtle::ConstantTimeEq; | ||
use zeroize::{Zeroize, ZeroizeOnDrop}; | ||
|
||
use crate::Threshold; | ||
use zeroize::{Zeroize, ZeroizeOnDrop, Zeroizing}; | ||
|
||
/// Each share contains a byte for the y-coordinate of 32 points on 32 different | ||
/// polynomials over Ed25519. All points share an x-coordinate, which is the 0th | ||
|
@@ -203,6 +205,15 @@ impl RackSecret { | |
let secret = shamir::compute_secret(shares)?.try_into()?; | ||
Ok(secret) | ||
} | ||
|
||
pub fn reconstruct_from_iter<'a>( | ||
shares: impl Iterator<Item = &'a Share>, | ||
) -> Result<ReconstructedRackSecret, RackSecretReconstructError> { | ||
let mut shares: Vec<Share> = shares.cloned().collect(); | ||
let res = RackSecret::reconstruct(&shares); | ||
shares.zeroize(); | ||
res | ||
} | ||
} | ||
|
||
impl DebugSecret for RackSecret {} | ||
|
@@ -242,6 +253,63 @@ impl Default for Salt { | |
} | ||
} | ||
|
||
/// Encrypt the old rack secret with a key derived from the new rack secret. | ||
/// | ||
/// A random salt is generated and returned along with the encrypted secret. Key | ||
/// derivation context includes `rack_id`, `old_epoch`, and `new_epoch`. | ||
pub fn encrypt_old_rack_secret( | ||
old_rack_secret: ReconstructedRackSecret, | ||
new_rack_secret: ReconstructedRackSecret, | ||
rack_id: RackUuid, | ||
old_epoch: Epoch, | ||
new_epoch: Epoch, | ||
) -> aead::Result<(EncryptedRackSecret, Salt)> { | ||
let salt = Salt::new(); | ||
let cipher = derive_encryption_key_for_rack_secret( | ||
new_rack_secret, | ||
salt, | ||
rack_id, | ||
old_epoch, | ||
new_epoch, | ||
); | ||
|
||
// This key is only used to encrypt one plaintext. A nonce of all zeroes is | ||
// all that's required. | ||
Comment on lines
+276
to
+277
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this the case? I'm curious but not sure how the second statement follows from the first. |
||
let nonce = [0u8; 12].into(); | ||
let encrypted_rack_secret = EncryptedRackSecret( | ||
cipher.encrypt(&nonce, old_rack_secret.expose_secret().as_ref())?, | ||
); | ||
|
||
Ok((encrypted_rack_secret, salt)) | ||
} | ||
|
||
fn derive_encryption_key_for_rack_secret( | ||
new_rack_secret: ReconstructedRackSecret, | ||
salt: Salt, | ||
rack_id: RackUuid, | ||
old_epoch: Epoch, | ||
new_epoch: Epoch, | ||
) -> ChaCha20Poly1305 { | ||
let prk = Hkdf::<Sha3_256>::new( | ||
Some(&salt.0[..]), | ||
new_rack_secret.expose_secret(), | ||
); | ||
|
||
// The "info" string is context to bind the key to its purpose | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does "bind" mean here? |
||
let mut key = Zeroizing::new([0u8; 32]); | ||
prk.expand_multi_info( | ||
&[ | ||
b"trust-quorum-v1-rack-secret", | ||
rack_id.as_untyped_uuid().as_ref(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Huh I guess we should probably impl |
||
&new_epoch.0.to_be_bytes(), | ||
&old_epoch.0.to_be_bytes(), | ||
], | ||
key.as_mut(), | ||
) | ||
.unwrap(); | ||
ChaCha20Poly1305::new(Key::from_slice(key.as_ref())) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
// This Source Code Form is subject to the terms of the Mozilla Public | ||
// License, v. 2.0. If a copy of the MPL was not distributed with this | ||
// file, You can obtain one at https://mozilla.org/MPL/2.0/. | ||
|
||
//! Various errors for the trust quorum APIs | ||
|
||
use crate::configuration::ConfigurationError; | ||
use crate::{Epoch, PlatformId, Threshold}; | ||
use omicron_uuid_kinds::RackUuid; | ||
|
||
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)] | ||
pub enum CommitError { | ||
#[error("invalid rack id")] | ||
InvalidRackId( | ||
#[from] | ||
#[source] | ||
MismatchedRackIdError, | ||
), | ||
|
||
#[error("missing prepare msg")] | ||
MissingPrepare, | ||
|
||
#[error("prepare for a later configuration exists")] | ||
OutOfOrderCommit, | ||
} | ||
|
||
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)] | ||
#[error( | ||
"sled was decommissioned on msg from {from:?} at epoch {epoch:?}: last prepared epoch = {last_prepared_epoch:?}" | ||
)] | ||
pub struct SledDecommissionedError { | ||
pub from: PlatformId, | ||
pub epoch: Epoch, | ||
pub last_prepared_epoch: Option<Epoch>, | ||
} | ||
|
||
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)] | ||
#[error("mismatched rack id: expected {expected:?}, got {got:?}")] | ||
pub struct MismatchedRackIdError { | ||
pub expected: RackUuid, | ||
pub got: RackUuid, | ||
} | ||
|
||
#[derive(Debug, Clone, thiserror::Error, PartialEq, Eq)] | ||
pub enum ReconfigurationError { | ||
#[error("reconfiguration coordinator must be a member of the new group")] | ||
CoordinatorMustBeAMemberOfNewGroup, | ||
|
||
#[error("upgrade from LRTQ required")] | ||
UpgradeFromLrtqRequired, | ||
|
||
#[error( | ||
"number of members: {num_members:?} must be greater than threshold: {threshold:?}" | ||
)] | ||
ThresholdMismatch { num_members: usize, threshold: Threshold }, | ||
|
||
#[error( | ||
"invalid membership size: {0:?}: must be between 3 and 32 inclusive" | ||
)] | ||
InvalidMembershipSize(usize), | ||
|
||
#[error( | ||
"invalid threshold: {0:?}: threshold must be between 2 and 31 inclusive" | ||
)] | ||
InvalidThreshold(Threshold), | ||
|
||
#[error( | ||
"Node has last committed epoch of {node_epoch:?}, message contains {msg_epoch:?}" | ||
)] | ||
LastCommittedEpochMismatch { | ||
node_epoch: Option<Epoch>, | ||
msg_epoch: Option<Epoch>, | ||
}, | ||
|
||
#[error( | ||
"sled has already prepared a request at epoch {existing:?}, and cannot prepare another at a smaller or equivalent epoch {new:?}" | ||
)] | ||
PreparedEpochMismatch { existing: Epoch, new: Epoch }, | ||
|
||
#[error("invalid rack id in reconfigure msg")] | ||
InvalidRackId( | ||
#[from] | ||
#[source] | ||
MismatchedRackIdError, | ||
), | ||
|
||
#[error("cannot reconfigure a decommissioned sled")] | ||
DecommissionedSled( | ||
#[from] | ||
#[source] | ||
SledDecommissionedError, | ||
), | ||
#[error( | ||
"reconfiguration in progress at epoch {current_epoch:?}: cannot reconfigure for older epoch {msg_epoch:?}" | ||
)] | ||
ReconfigurationInProgress { current_epoch: Epoch, msg_epoch: Epoch }, | ||
|
||
#[error("mismatched reconfiguration requests for epoch {0:?}")] | ||
MismatchedReconfigurationForSameEpoch(Epoch), | ||
|
||
#[error(transparent)] | ||
Configuration(#[from] ConfigurationError), | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a reason this looks different from
reconstruct
above?