Skip to content

Commit

Permalink
Add new features to Rare CLI
Browse files Browse the repository at this point in the history
The script now lists all available pods in the namespace, with the
ability to select a pod. This should make scaling the environment
easier. There is also a new feature to destroy and re-create a pod, a
verbose argument, and a shell argument.

Old usage: ./rare.sh qlora
New usage: ./rare.sh shell qlora or ./rare.sh sh qlor
  • Loading branch information
Liana64 committed Nov 20, 2024
1 parent 1944240 commit 4bf6892
Showing 1 changed file with 127 additions and 39 deletions.
166 changes: 127 additions & 39 deletions scripts/rare.sh
Original file line number Diff line number Diff line change
@@ -1,40 +1,51 @@
#!/bin/bash

# TODO: Add shell arg with any specified pod name
RARE_CLUSTER=${RARE_CLUSTER:-"arc1"}
RARE_NAMESPACE=${RARE_NAMESPACE:-"machine-learning"}

function show_help() {
echo "Usage: $0 <command> [options]"
echo "Usage: $0 <command> [podname] [options]"
echo ""
echo "Commands:"
echo " login Logs into teleport and sets up Kubernetes context"
echo " logout Logs out from teleport"
echo " qlora Executes into the qlora pod in the machine-learning namespace"
echo " mmseqs2 Executes into the mmseqs2 pod in the machine-learning namespace"
echo " shell, sh Shell into a specified pod"
echo " list, ls List available pods in the namespace"
echo " destroy, rm Destroy a specified pod and wait for automatic re-deployment"
echo ""
echo "Options:"
echo " -u, --user Specify the teleport user"
echo " -n, --namespace Specify the Kubernetes namespace (default: machine-learning)"
echo " -c, --cluster Specify the teleport cluster (default: RARE_CLUSTER environment variable)"
echo " -n, --namespace Specify the Kubernetes namespace (default: RARE_NAMESPACE environment variable)"
echo " -v, --verbose Show verbose output"
echo " -h, --help Show this help message"
echo ""
}

# Default values
NAMESPACE="machine-learning"

# Parse arguments
COMMAND=$1
shift
POD_NAME=$2
VERBOSE="false"
shift 2

while [[ $# -gt 0 ]]; do
case "$1" in
-u|--user)
USER="$2"
shift 2
;;
-c|--cluster)
TELEPORT_CLUSTER="$2"
shift 2
;;
-n|--namespace)
NAMESPACE="$2"
RARE_NAMESPACE="$2"
shift 2
;;
-v|--verbose)
VERBOSE="true"
shift
;;
-h|--help)
show_help
exit 0
Expand All @@ -47,6 +58,97 @@ while [[ $# -gt 0 ]]; do
esac
done

function list_pods() {
echo "Available pods in namespace '$RARE_NAMESPACE':"
# List pods and clean their names to strip suffixes
tsh kubectl get pods -n "$RARE_NAMESPACE" -o custom-columns=":metadata.name" | grep -v "NAME" | sed -E 's/-[a-z0-9]+(-[a-z0-9]+)?$//' | while read -r pod; do
echo "$pod"
done

# Add verbose output if the verbose flag is set
if [[ "$VERBOSE" == "true" ]]; then
echo ""
echo "Detailed information for each pod:"
echo "---------------------------------"
tsh kubectl get pods -n "$RARE_NAMESPACE" -o custom-columns=":metadata.name" | grep -v "NAME" | while read -r pod; do
echo "Pod: $pod"
tsh kubectl describe pod "$pod" -n "$RARE_NAMESPACE" | grep -E "Name:|Namespace:|Status:|Node:|Containers:|Image:"
echo "---------------------------------"
done
fi
#tsh kubectl get pods -n "$RARE_NAMESPACE" -o custom-columns=":metadata.name" | grep -v "NAME" | sed -E 's/-[a-z0-9]+(-[a-z0-9]+)?$//'
}

function shell_into_pod() {
if [[ -z "$POD_NAME" ]]; then
echo "No pod specified. Listing available pods..."
list_pods
echo ""
read -p "Enter the pod name to shell into: " POD_NAME
fi

if [[ -z "$POD_NAME" ]]; then
echo "Error: No pod name provided."
exit 1
fi

POD_TARGET=$(tsh kubectl get pods -n "$RARE_NAMESPACE" -o name | grep $POD_NAME | awk -F '/' '{print $2}')
MAIN_CONTAINER=$(tsh kubectl get pod "$POD_TARGET" -n "$RARE_NAMESPACE" -o jsonpath="{.spec.containers[0].name}")

if [[ -z "$MAIN_CONTAINER" ]]; then
echo "Error: Unable to determine the main container for pod $POD_NAME."
exit 1
fi

echo "Shelling into pod $POD_NAME..."
tsh kubectl exec -it "$POD_TARGET" -n "$RARE_NAMESPACE" -c "$MAIN_CONTAINER" -- /bin/bash
}

function destroy_pod() {
if [[ -z "$POD_NAME" ]]; then
echo "No pod specified. Listing available pods..."
list_pods
echo ""
read -p "Enter the pod name to destroy: " POD_NAME
fi

if [[ -z "$POD_NAME" ]]; then
echo "Error: No pod name provided."
exit 1
fi

POD_TARGET=$(tsh kubectl get pods -n "$RARE_NAMESPACE" -o name | grep $POD_NAME | awk -F '/' '{print $2}')
echo "Destroying pod $POD_NAME..."
tsh kubectl delete pod "$POD_TARGET" -n "$RARE_NAMESPACE"

echo "Waiting for pod $POD_NAME to be re-deployed..."
START_TIME=$(date +%s)
TIMEOUT=60

while true; do
CURRENT_TIME=$(date +%s)
ELAPSED_TIME=$((CURRENT_TIME - START_TIME))
# Check if the pod exists and is running
NEW_POD=$(tsh kubectl get pods -n "$RARE_NAMESPACE" -o name | grep "$POD_NAME")
if [[ -n "$NEW_POD" ]]; then
echo "Pod $POD_NAME ready in $ELAPSED_TIME seconds"
if [[ "$VERBOSE" == "true" ]]; then
echo "The new pod is identified by $NEW_POD"
fi
break
fi

# Check for timeout
if [[ $ELAPSED_TIME -ge $TIMEOUT ]]; then
echo "Error: Timeout reached while waiting for pod $POD_NAME to be re-deployed."
exit 1
fi

sleep 5 # Wait 5 seconds before checking again
done

}

if [[ "$COMMAND" == "login" ]]; then
if [[ -z "$USER" ]]; then
echo "Error: --user is required for login"
Expand All @@ -62,50 +164,36 @@ if [[ "$COMMAND" == "login" ]]; then
echo " "
echo "Logging into teleport..."
echo ""
tsh login --proxy=teleport.rarecompute.io:443 --auth=local --user="$USER" teleport.rarecompute.io
tsh login --proxy="teleport.rarecompute.io:443" --auth=local --user="$USER" teleport.rarecompute.io

echo "Setting KUBECONFIG..."
export KUBECONFIG=~/teleport-kubeconfig.yaml
echo "Logging into Kubernetes cluster..."
tsh kube login arc1
tsh kube login $RARE_CLUSTER

echo "Setting namespace to $NAMESPACE..."
kubectl config set-context "$(kubectl config current-context)" --namespace="$NAMESPACE"
echo "Setting namespace to $RARE_NAMESPACE..."
kubectl config set-context "$(kubectl config current-context)" --namespace="$RARE_NAMESPACE"

echo "Login complete! If you run into any issues after this command, try running:"
echo ""
echo "tsh kube login arc1"
echo "Login complete! If you run into any issues, try running:"
echo ""
echo "tsh kube login $RARE_CLUSTER"
echo ""
echo "-------------------------------------------------------------------"
echo "-------------------------------------------------------------------------------"
echo ""
echo "Run the following commands in your shell to finalize:"
echo "Run the below command to finalize:"
echo ""
echo "export KUBECONFIG=~/teleport-kubeconfig.yaml"
echo ""
elif [[ "$COMMAND" == "logout" ]]; then
echo "Logging out of teleport..."
tsh logout
echo "Logout complete!"
elif [[ "$COMMAND" == "qlora" ]]; then
echo "Finding qlora pod..."
POD_NAME=$(tsh kubectl get pods -n "$NAMESPACE" -o name | grep qlora | awk -F '/' '{print $2}')
if [[ -z "$POD_NAME" ]]; then
echo "Error: No qlora pod found in namespace $NAMESPACE"
exit 1
fi

echo "Executing into pod $POD_NAME..."
tsh kubectl exec -it "$POD_NAME" -n "$NAMESPACE" -- /bin/bash
elif [[ "$COMMAND" == "mmseqs2" ]]; then
echo "Finding mmseqs2 pod..."
POD_NAME=$(tsh kubectl get pods -n "$NAMESPACE" -o name | grep mmseqs2 | awk -F '/' '{print $2}')
if [[ -z "$POD_NAME" ]]; then
echo "Error: No mmseqs2 pod found in namespace $NAMESPACE"
exit 1
fi

echo "Executing into pod $POD_NAME..."
tsh kubectl exec -it "$POD_NAME" -n "$NAMESPACE" -- /bin/bash
elif [[ "$COMMAND" == "shell" || "$COMMAND" == "sh" ]]; then
shell_into_pod
elif [[ "$COMMAND" == "list" || "$COMMAND" == "ls" ]]; then
list_pods
elif [[ "$COMMAND" == "destroy" || "$COMMAND" == "rm" ]]; then
destroy_pod
else
echo "Unknown command: $COMMAND"
show_help
Expand Down

0 comments on commit 4bf6892

Please sign in to comment.