Skip to content

Commit

Permalink
adds new commands for deprovision and reboot
Browse files Browse the repository at this point in the history
  • Loading branch information
jarededwards committed Dec 12, 2024
1 parent 5e77a60 commit f3b903a
Show file tree
Hide file tree
Showing 12 changed files with 451 additions and 10 deletions.
174 changes: 174 additions & 0 deletions cmd/deprovision.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
package cmd

import (
"bytes"
"fmt"
"html/template"
"os"
"path/filepath"
"strings"

"github.com/konstructio/colony/internal/constants"
"github.com/konstructio/colony/internal/k8s"
"github.com/konstructio/colony/internal/logger"
"github.com/konstructio/colony/internal/utils"
"github.com/konstructio/colony/manifests"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type DeprovisionWorkflowRequest struct {
Mac string
RandomSuffix string
}

func getDeprovisionCommand() *cobra.Command {
var hardwareID, bootDevice string
var efiBoot, destroy bool
deprovisionCmd := &cobra.Command{
Use: "deprovision",
Short: "remove a hardware from your colony data center - very destructive",
RunE: func(cmd *cobra.Command, _ []string) error {
ctx := cmd.Context()
log := logger.New(logger.Debug)

randomSuffix := utils.RandomString(6)

homeDir, err := os.UserHomeDir()
if err != nil {
return fmt.Errorf("error getting user home directory: %w", err)
}

k8sClient, err := k8s.New(log, filepath.Join(homeDir, constants.ColonyDir, constants.KubeconfigHostPath))
if err != nil {
return fmt.Errorf("failed to create k8s client: %w", err)
}

if err = k8sClient.LoadMappingsFromKubernetes(); err != nil {
return fmt.Errorf("error loading dynamic mappings from kubernetes: %w", err)
}

log.Infof("rebooting hardware with id %q", hardwareID)
log.Infof("boot device %q", bootDevice)
log.Infof("efi boot %t", efiBoot)
log.Infof("destroy %t", destroy)

// TODO if the machine state is powered on, restart it so the workflow will run

// todo
//! POST to api to mark the hardware removed
// get hardware and remove ipxe
hw, err := k8sClient.HardwareRemoveIPXE(ctx, k8s.UpdateHardwareRequest{
HardwareID: hardwareID,
Namespace: constants.ColonyNamespace,
RemoveIpXE: true,
})
if err != nil {
return fmt.Errorf("error getting hardware: %w", err)
}
log.Infof("hardware: %v", hw)

//! detokenize and apply the workflow

file, err := manifests.Workflow.ReadFile("workflow/wipe-disks.yaml.tmpl")
if err != nil {
return fmt.Errorf("error reading templates file: %w", err)
}

tmpl, err := template.New("ipmi").Funcs(template.FuncMap{
"replaceColonsWithHyphens": func(s string) string {
return strings.ReplaceAll(s, ":", "-")
},
}).Parse(string(file))
if err != nil {
return fmt.Errorf("error parsing template: %w", err)
}

var outputBuffer bytes.Buffer

err = tmpl.Execute(&outputBuffer, DeprovisionWorkflowRequest{
Mac: hw.Spec.Interfaces[0].DHCP.MAC,
RandomSuffix: randomSuffix,
})
if err != nil {
return fmt.Errorf("error executing template: %w", err)
}

log.Info(outputBuffer.String())

ip, err := k8sClient.GetHardwareMachineRefFromSecretLabel(ctx, constants.ColonyNamespace, metav1.ListOptions{
LabelSelector: fmt.Sprintf("colony.konstruct.io/hardware-id=%s", hardwareID),
})
if err != nil {
return fmt.Errorf("error getting machine ref secret: %w", err)
}

//! NOT UNTIL WE'RE SURE
if err := k8sClient.ApplyManifests(ctx, []string{outputBuffer.String()}); err != nil {
return fmt.Errorf("error applying rufiojob: %w", err)
}

err = k8sClient.FetchAndWaitForWorkflow(ctx, k8s.WorkflowWaitRequest{
LabelValue: strings.ReplaceAll(ip, ".", "-"),
Namespace: constants.ColonyNamespace,
WaitTimeout: 300,
RandomSuffix: randomSuffix,
})
if err != nil {
return fmt.Errorf("error waiting for workflow: %w", err)
}

// reboot
file2, err := manifests.IPMI.ReadFile("ipmi/ipmi-off-pxe-on.yaml.tmpl")
if err != nil {
return fmt.Errorf("error reading templates file: %w", err)
}

tmpl2, err := template.New("ipmi").Funcs(template.FuncMap{
"replaceDotsWithDash": func(s string) string {
return strings.ReplaceAll(s, ".", "-")
},
}).Parse(string(file2))
if err != nil {
return fmt.Errorf("error parsing template: %w", err)
}

var outputBuffer2 bytes.Buffer

err = tmpl2.Execute(&outputBuffer2, RufioPowerCycleRequest{
IP: ip,
EFIBoot: efiBoot,
BootDevice: bootDevice,
RandomSuffix: randomSuffix,
})
if err != nil {
return fmt.Errorf("error executing template: %w", err)
}

log.Info(outputBuffer2.String())

if err := k8sClient.ApplyManifests(ctx, []string{outputBuffer2.String()}); err != nil {
return fmt.Errorf("error applying rufiojob: %w", err)
}

err = k8sClient.FetchAndWaitForRufioJobs(ctx, k8s.RufioJobWaitRequest{
LabelValue: strings.ReplaceAll(ip, ".", "-"),
Namespace: constants.ColonyNamespace,
WaitTimeout: 300,
RandomSuffix: randomSuffix,
})

if err != nil {
return fmt.Errorf("error get machine: %w", err)
}

return nil
},
}
deprovisionCmd.Flags().StringVar(&hardwareID, "hardware-id", "", "hardware id of the server to deprovision - WARNING: you can not recover this server")
deprovisionCmd.Flags().StringVar(&bootDevice, "boot-device", "pxe", "the bootdev to set (pxe, bios) defaults to pxe")
deprovisionCmd.Flags().BoolVar(&efiBoot, "efiBoot", true, "boot device option (uefi, legacy) defaults to uefi")
deprovisionCmd.Flags().BoolVar(&destroy, "destroy", false, "whether to destroy the machine and its associated resources")
deprovisionCmd.MarkFlagRequired("hardware-id")
return deprovisionCmd
}
7 changes: 3 additions & 4 deletions cmd/reboot.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"path/filepath"
"strings"

"github.com/konstructio/colony/configs"
"github.com/konstructio/colony/internal/constants"
"github.com/konstructio/colony/internal/k8s"
"github.com/konstructio/colony/internal/logger"
Expand All @@ -28,7 +27,7 @@ func getRebootCommand() *cobra.Command {

ctx := cmd.Context()
log := logger.New(logger.Debug)
log.Info("colony cli version: ", configs.Version)

homeDir, err := os.UserHomeDir()
if err != nil {
return fmt.Errorf("error getting user home directory: %w", err)
Expand Down Expand Up @@ -84,7 +83,7 @@ func getRebootCommand() *cobra.Command {
log.Info(outputBuffer.String())

if err := k8sClient.ApplyManifests(ctx, []string{outputBuffer.String()}); err != nil {
return fmt.Errorf("error applying rufiojob: %w", err)
return fmt.Errorf("error applying rufio job: %w", err)
}

err = k8sClient.FetchAndWaitForRufioJobs(ctx, k8s.RufioJobWaitRequest{
Expand All @@ -95,7 +94,7 @@ func getRebootCommand() *cobra.Command {
})

if err != nil {
return fmt.Errorf("error get machine: %w", err)
return fmt.Errorf("error get rufio job: %w", err)
}

return nil
Expand Down
9 changes: 8 additions & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ func GetRootCommand() *cobra.Command {
SilenceErrors: true, // we print the errors ourselves on main
}

cmd.AddCommand(getDestroyCommand(), getInitCommand(), getAddIPMICommand(), getRebootCommand(), getVersionCommand(), getAssetsCommand())
cmd.AddCommand(
getDestroyCommand(),
getInitCommand(),
getAddIPMICommand(),
getRebootCommand(),
getVersionCommand(),
getAssetsCommand(),
getDeprovisionCommand())
return cmd
}
88 changes: 87 additions & 1 deletion internal/k8s/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ func (c *Client) waitForMachineReady(ctx context.Context, gvr schema.GroupVersio

err := wait.PollUntilContextTimeout(ctx, 5*time.Second, time.Duration(timeoutSeconds)*time.Second, true, func(ctx context.Context) (bool, error) {
// Get the latest Machine object
m, err := c.dynamic.Resource(gvr).Namespace(namespace).Get(context.Background(), machineName, metav1.GetOptions{})
m, err := c.dynamic.Resource(gvr).Namespace(namespace).Get(ctx, machineName, metav1.GetOptions{})
if err != nil {
// If we couldn't connect, retry
if isNetworkingError(err) {
Expand Down Expand Up @@ -542,6 +542,13 @@ type RufioJobWaitRequest struct {
WaitTimeout int
}

type WorkflowWaitRequest struct {
LabelValue string
Namespace string
RandomSuffix string
WaitTimeout int
}

// ! refactor... this is so dupe
func (c *Client) FetchAndWaitForRufioJobs(ctx context.Context, job RufioJobWaitRequest) error {
c.logger.Infof("waiting for job %q in namespace %q", job.RandomSuffix, job.Namespace)
Expand Down Expand Up @@ -571,6 +578,85 @@ func (c *Client) FetchAndWaitForRufioJobs(ctx context.Context, job RufioJobWaitR
return nil
}

func (c *Client) FetchAndWaitForWorkflow(ctx context.Context, workflow WorkflowWaitRequest) error {
c.logger.Infof("waiting for workflow %q in namespace %q", workflow.RandomSuffix, workflow.Namespace)

gvr := schema.GroupVersionResource{
Group: v1alpha1.GroupVersion.Group,
Version: v1alpha1.GroupVersion.Version,
Resource: v1alpha1.GroupVersion.WithResource("workflows").Resource,
}

w, err := c.returnWorkflowObject(ctx, gvr, workflow.Namespace, workflow.WaitTimeout, metav1.ListOptions{
LabelSelector: fmt.Sprintf("colony.konstruct.io/job-id=%s", workflow.RandomSuffix),
})
if err != nil {
return fmt.Errorf("error finding job %q: %w", workflow.LabelValue, err)
}

c.logger.Infof("job %q found in namespace %q", workflow.LabelValue, workflow.Namespace)

_, err = c.waitWorkflowComplete(ctx, gvr, w, workflow.WaitTimeout)
if err != nil {
return fmt.Errorf("error waiting for job %q: %w", workflow.LabelValue, err)
}

c.logger.Infof("job %q in namespace %q is ready", workflow.LabelValue, workflow.Namespace)

return nil
}

type UpdateHardwareRequest struct {
HardwareID string
Namespace string
RemoveIpXE bool

Check failure on line 612 in internal/k8s/k8s.go

View workflow job for this annotation

GitHub Actions / run-tests

var-naming: struct field RemoveIpXE should be RemoveIPXE (revive)
}

func (c *Client) HardwareRemoveIPXE(ctx context.Context, hardware UpdateHardwareRequest) (*v1alpha1.Hardware, error) {
c.logger.Infof("getting hardware %q in namespace %q", hardware.HardwareID, hardware.Namespace)

gvr := schema.GroupVersionResource{
Group: v1alpha1.GroupVersion.Group,
Version: v1alpha1.GroupVersion.Version,
Resource: v1alpha1.GroupVersion.WithResource("hardware").Resource,
}

hw, err := c.dynamic.Resource(gvr).Namespace(hardware.Namespace).Get(ctx, hardware.HardwareID, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("error getting hardware %q: %w", hardware.HardwareID, err)
}

h := &v1alpha1.Hardware{}

err = runtime.DefaultUnstructuredConverter.FromUnstructured(hw.UnstructuredContent(), h)
if err != nil {
return nil, fmt.Errorf("error converting unstructured to hardware: %w", err)
}

c.logger.Infof("hardware %q found, removing ipxe script ", hw.GetName())

h.Spec.Interfaces[0].Netboot.IPXE = &v1alpha1.IPXE{}

unstructuredObj, err := runtime.DefaultUnstructuredConverter.ToUnstructured(h)
if err != nil {
return nil, fmt.Errorf("error converting hardware to unstructured: %w", err)
}

obj, err := c.dynamic.Resource(gvr).Namespace(hardware.Namespace).Update(ctx, &unstructured.Unstructured{Object: unstructuredObj}, metav1.UpdateOptions{})
if err != nil {
return nil, fmt.Errorf("error updating hardware %q: %w", hardware.HardwareID, err)
}

c.logger.Infof("removed ipxe script from hardware %q", obj.GetName())

err = runtime.DefaultUnstructuredConverter.FromUnstructured(hw.UnstructuredContent(), h)
if err != nil {
return nil, fmt.Errorf("error converting updated unstructured to hardware: %w", err)
}

return h, nil
}

func (c *Client) ListAssets(ctx context.Context) error {
// Set up columns for hardware table
columns := []table.Column{
Expand Down
2 changes: 1 addition & 1 deletion internal/k8s/rufiojobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func (c *Client) waitForJobComplete(ctx context.Context, gvr schema.GroupVersion

err := wait.PollUntilContextTimeout(ctx, 5*time.Second, time.Duration(timeoutSeconds)*time.Second, true, func(ctx context.Context) (bool, error) {
// Get the latest Machine object
j, err := c.dynamic.Resource(gvr).Namespace(namespace).Get(context.Background(), jobName, metav1.GetOptions{})
j, err := c.dynamic.Resource(gvr).Namespace(namespace).Get(ctx, jobName, metav1.GetOptions{})
if err != nil {
// If we couldn't connect, retry
if isNetworkingError(err) {
Expand Down
Loading

0 comments on commit f3b903a

Please sign in to comment.