Skip to content

Commit

Permalink
ciao-deploy: Add "unjoin" command to remove node from cluster
Browse files Browse the repository at this point in the history
Removes launcher and all assets from the worker node.

Signed-off-by: Rob Bradford <[email protected]>
  • Loading branch information
rbradford committed Aug 9, 2017
1 parent b8d8052 commit 15fb864
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 1 deletion.
12 changes: 11 additions & 1 deletion ciao-deploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,17 @@ ciao-deploy join ciao-cn01 ciao-cn02 ciao-cn03

## Teardown

Tearing down the cluster is not currently implemented.
To remove ciao from the worker nodes in the cluster `ciao-deploy unjoin` can be
used.

```
ciao-deploy unjoin ciao-nn ciao-cn01 ciao-cn02 ciao-cn03
```

The same command can be used for both network and compute nodes without
differentiation.

Teardown of the master node is not currently supported.

## Support

Expand Down
67 changes: 67 additions & 0 deletions ciao-deploy/cmd/unjoin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright © 2017 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"context"
"fmt"
"os"
"os/signal"
"os/user"
"syscall"

"github.com/01org/ciao/ciao-deploy/deploy"
"github.com/spf13/cobra"
)

// unjoinCmd represents the unjoin command
var unjoinCmd = &cobra.Command{
Use: "unjoin <hosts>",
Short: "Remove the specified nodes from the cluster",
Long: `Remove the nodes from the cluster. Removing certificates and
uninstalling software.`,
Run: func(cmd *cobra.Command, args []string) {
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()

sigCh := make(chan os.Signal, 1)
go func() {
<-sigCh
cancelFunc()
}()
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)

hosts := args
err := deploy.TeardownNodes(ctx, sshUser, hosts)
if err != nil {
fmt.Fprintf(os.Stderr, "Error unprovisioning nodes: %v\n", err)
os.Exit(1)
}
os.Exit(0)
},
Args: cobra.MinimumNArgs(1),
}

func init() {
RootCmd.AddCommand(unjoinCmd)

u, err := user.Current()
currentUser := ""
if err == nil {
currentUser = u.Username
}

unjoinCmd.Flags().StringVar(&sshUser, "user", currentUser, "User to SSH as")
}
79 changes: 79 additions & 0 deletions ciao-deploy/deploy/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"os"
"path"
"sync"
"time"

"github.com/01org/ciao/ssntp"
"github.com/pkg/errors"
Expand Down Expand Up @@ -185,3 +186,81 @@ func SetupNodes(ctx context.Context, sshUser string, networkNode bool, hosts []s
wg.Wait()
return nil
}

func teardownNode(ctx context.Context, hostname string, sshUser string) error {
tool := "ciao-launcher"
fmt.Printf("%s: Stopping %s\n", hostname, tool)
err := SSHRunCommand(ctx, sshUser, hostname, fmt.Sprintf("sudo systemctl stop %s", tool))
if err != nil {
return errors.Wrap(err, "Error stopping tool on node")
}

fmt.Printf("%s: Removing %s service file\n", hostname, tool)
serviceFilePath := path.Join("/etc/systemd/system", fmt.Sprintf("%s.service", tool))
err = SSHRunCommand(ctx, sshUser, hostname, fmt.Sprintf("sudo rm %s", serviceFilePath))
if err != nil {
return errors.Wrap(err, "Error removing systemd service file")
}

fmt.Printf("%s: Reloading systemd unit files\n", hostname)
err = SSHRunCommand(ctx, sshUser, hostname, "sudo systemctl daemon-reload")
if err != nil {
return errors.Wrap(err, "Error restarting systemctl on node")
}

fmt.Printf("%s: Removing %s certificates\n", hostname, tool)
caCertPath := path.Join(ciaoPKIDir, "CAcert.pem")
err = SSHRunCommand(ctx, sshUser, hostname, fmt.Sprintf("sudo rm %s", caCertPath))
if err != nil {
return errors.Wrap(err, "Error removing CA certificate")
}

// One of these can fail so ignore errors on both.
var computeAgentRole ssntp.Role = ssntp.AGENT
computeAgentCertPath := path.Join(ciaoPKIDir, fmt.Sprintf("cert-%s-%s.pem", computeAgentRole.String(), hostname))
_ = SSHRunCommand(ctx, sshUser, hostname, fmt.Sprintf("sudo rm %s", computeAgentCertPath))

var networkAgentRole ssntp.Role = ssntp.NETAGENT
networkAgentCertPath := path.Join(ciaoPKIDir, fmt.Sprintf("cert-%s-%s.pem", networkAgentRole.String(), hostname))
_ = SSHRunCommand(ctx, sshUser, hostname, fmt.Sprintf("sudo rm %s", networkAgentCertPath))

err = SSHRunCommand(ctx, sshUser, hostname, fmt.Sprintf("sudo rmdir %s", ciaoPKIDir))
if err != nil {
return errors.Wrap(err, "Error removing ciao PKI directory")
}

// Need extra timeout here due to #343
fmt.Printf("%s: Performing ciao-launcher hard reset\n", hostname)
timeoutContext, cancelFunc := context.WithTimeout(ctx, time.Second*60)
err = SSHRunCommand(timeoutContext, sshUser, hostname, "sudo ciao-launcher --hard-reset")
cancelFunc()
if timeoutContext.Err() != context.DeadlineExceeded && err != nil {
return errors.Wrap(err, "Error doing hard-reset on ciao-launcher")
}

fmt.Printf("%s: Removing %s binary\n", hostname, tool)
systemToolPath := path.Join("/usr/local/bin/", tool)
err = SSHRunCommand(ctx, sshUser, hostname, fmt.Sprintf("sudo rm %s", systemToolPath))
if err != nil {
return errors.Wrap(err, "Error removing tool binary")
}

return nil
}

// TeardownNodes removes launcher from the given nodes
func TeardownNodes(ctx context.Context, sshUser string, hosts []string) error {
var wg sync.WaitGroup
for _, host := range hosts {
wg.Add(1)
go func(hostname string) {
err := teardownNode(ctx, hostname, sshUser)
if err != nil {
fmt.Fprintf(os.Stderr, "Error tearing down node: %s: %v\n", hostname, err)
}
wg.Done()
}(host)
}
wg.Wait()
return nil
}

0 comments on commit 15fb864

Please sign in to comment.