Skip to content

Commit

Permalink
Allow device to have multiple reasons for being in maintenance mode
Browse files Browse the repository at this point in the history
This change allows the device to have multiple reasons for being in
maintenance mode. This is useful when multiple conditions are met that
require the device to be in maintenance mode. For example, if the TPM
is in error and the device disk is full, the device can be in
maintenance mode for both reasons. Clearing one of the reasons will not
take the device out of maintenance mode if there are other reasons for
it to be in maintenance mode.

Signed-off-by: Shahriyar Jalayeri <shahriyar@zededa.com>
  • Loading branch information
shjala committed Jan 22, 2025
1 parent 2c2d305 commit b49e579
Showing 3 changed files with 50 additions and 52 deletions.
5 changes: 1 addition & 4 deletions pkg/pillar/cmd/nodeagent/handletimers.go
Original file line number Diff line number Diff line change
@@ -131,12 +131,9 @@ func handleRebootOnVaultLocked(ctxPtr *nodeagentContext) {
scheduleNodeOperation(ctxPtr, errStr, types.BootReasonVaultFailure,
types.DeviceOperationReboot)
} else {
log.Noticef("Setting %s",
types.MaintenanceModeReasonVaultLockedUp)
// there is no image update in progress, this happened after a normal
// reboot. enter maintenance mode
ctxPtr.maintMode = true
ctxPtr.maintModeReason = types.MaintenanceModeReasonVaultLockedUp
setMaintenanceModeReason(ctxPtr, types.MaintenanceModeReasonVaultLockedUp)
publishNodeAgentStatus(ctxPtr)
}
} else {
66 changes: 30 additions & 36 deletions pkg/pillar/cmd/nodeagent/nodeagent.go
Original file line number Diff line number Diff line change
@@ -780,14 +780,8 @@ func handleVaultStatusImpl(ctxArg interface{}, key string,
if vault.ConversionComplete {
ctx.vaultOperational = types.TS_ENABLED
// Do we need to clear maintenance?
if ctx.maintMode &&
ctx.maintModeReason == types.MaintenanceModeReasonVaultLockedUp {
log.Noticef("Clearing %s",
types.MaintenanceModeReasonVaultLockedUp)
ctx.maintMode = false
ctx.maintModeReason = types.MaintenanceModeReasonNone
publishNodeAgentStatus(ctx)
}
maybeClearMaintenanceModeReason(ctx, types.MaintenanceModeReasonVaultLockedUp)
publishNodeAgentStatus(ctx)
} else {
ctx.vaultOperational = types.TS_NONE
}
@@ -811,34 +805,19 @@ func handleVolumeMgrStatusImpl(ctxArg interface{}, key string,

ctx := ctxArg.(*nodeagentContext)
vms := statusArg.(types.VolumeMgrStatus)
changed := false
// This RemainingSpace takes into account the space reserved for
// /persist/newlog plus the percentage/minimum reserved for the rest
// of EVE-OS. Thus it can never go negative, but zero means that
// we neiether have space to download new images nor space to deploy
// a tiny app instance.
if vms.RemainingSpace == 0 {
log.Warnf("MaintenanceMode due to no remaining diskspace")
// Do not overwrite a vault maintenance mode
if !ctx.maintMode {
log.Noticef("Setting %s",
types.MaintenanceModeReasonNoDiskSpace)
ctx.maintModeReason = types.MaintenanceModeReasonNoDiskSpace
ctx.maintMode = true
changed = true
}
// Add to maintenance mode reasons
setMaintenanceModeReason(ctx, types.MaintenanceModeReasonNoDiskSpace)
publishNodeAgentStatus(ctx)
} else {
// Do we need to clear maintenance?
if ctx.maintMode &&
ctx.maintModeReason == types.MaintenanceModeReasonNoDiskSpace {
log.Noticef("Clearing %s",
types.MaintenanceModeReasonNoDiskSpace)
ctx.maintMode = false
ctx.maintModeReason = types.MaintenanceModeReasonNone
changed = true
}
}
if changed {
maybeClearMaintenanceModeReason(ctx, types.MaintenanceModeReasonNoDiskSpace)
publishNodeAgentStatus(ctx)
}
}
@@ -880,16 +859,31 @@ func handleTpmStatusImpl(ctxArg interface{}, key string,

if tpm.Status == types.MaintenanceModeReasonTpmEncFailure {
log.Errorf("handleTpmStatusImpl: TPM manager reported TPM error : %s", tpm.Error)
log.Noticef("Setting %s", types.MaintenanceModeReasonTpmEncFailure)
ctx.maintMode = true
ctx.maintModeReason = types.MaintenanceModeReasonTpmEncFailure
setMaintenanceModeReason(ctx, types.MaintenanceModeReasonTpmEncFailure)
publishNodeAgentStatus(ctx)
} else {
if ctx.maintMode && ctx.maintModeReason == types.MaintenanceModeReasonTpmEncFailure {
log.Noticef("Clearing %s", types.MaintenanceModeReasonTpmEncFailure)
ctx.maintMode = false
ctx.maintModeReason = types.MaintenanceModeReasonNone
publishNodeAgentStatus(ctx)
}
maybeClearMaintenanceModeReason(ctx, types.MaintenanceModeReasonTpmEncFailure)
publishNodeAgentStatus(ctx)
}
}

func maybeClearMaintenanceModeReason(ctx *nodeagentContext, reason types.MaintenanceModeReason) {
if ctx.maintModeReason&reason == reason {
clearMaintenanceModeReason(ctx, reason)
}
}

func setMaintenanceModeReason(ctx *nodeagentContext, reason types.MaintenanceModeReason) {
log.Noticef("Setting %s", reason)
ctx.maintModeReason |= reason
ctx.maintMode = true
}

func clearMaintenanceModeReason(ctx *nodeagentContext, reason types.MaintenanceModeReason) {
log.Noticef("Clearing %s", reason)
ctx.maintModeReason &^= reason
if ctx.maintModeReason == types.MaintenanceModeReasonNone {
log.Notice("No reason to be in maintenance mode, clearing maintenance mode")
ctx.maintMode = false
}
}
31 changes: 19 additions & 12 deletions pkg/pillar/types/zedagenttypes.go
Original file line number Diff line number Diff line change
@@ -401,20 +401,27 @@ const (

// String returns the verbose equivalent of MaintenanceModeReason code
func (mmr MaintenanceModeReason) String() string {
switch mmr {
case MaintenanceModeReasonNone:
reason := []string{}
if mmr == MaintenanceModeReasonNone {
return "MaintenanceModeReasonNone"
case MaintenanceModeReasonUserRequested:
return "MaintenanceModeReasonUserRequested"
case MaintenanceModeReasonVaultLockedUp:
return "MaintenanceModeReasonVaultLockedUp"
case MaintenanceModeReasonNoDiskSpace:
return "MaintenanceModeReasonNoDiskSpace"
case MaintenanceModeReasonTpmEncFailure:
return "MaintenanceModeReasonTpmEncFailure"
default:
return fmt.Sprintf("Unknown MaintenanceModeReason %d", mmr)
}
if (mmr & MaintenanceModeReasonUserRequested) == MaintenanceModeReasonUserRequested {
reason = append(reason, "MaintenanceModeReasonUserRequested")
}
if (mmr & MaintenanceModeReasonVaultLockedUp) == MaintenanceModeReasonVaultLockedUp {
reason = append(reason, "MaintenanceModeReasonVaultLockedUp")
}
if (mmr & MaintenanceModeReasonNoDiskSpace) == MaintenanceModeReasonNoDiskSpace {
reason = append(reason, "MaintenanceModeReasonNoDiskSpace")
}
if (mmr & MaintenanceModeReasonTpmEncFailure) == MaintenanceModeReasonTpmEncFailure {
reason = append(reason, "MaintenanceModeReasonTpmEncFailure")
}
if len(reason) == 0 {
return "Unknown MaintenanceModeReason"
}

return strings.Join(reason, "|")
}

// NodeAgentStatus :

0 comments on commit b49e579

Please sign in to comment.