From a07cf016af68bdc989398ba1483110a5ac2a0df8 Mon Sep 17 00:00:00 2001 From: Andrey Arapov <107317698+andy108369@users.noreply.github.com> Date: Tue, 7 Nov 2023 13:54:09 +0100 Subject: [PATCH] chore(docs): various docs updates (#282) --- .../prepare-kubernetes-cluster.md | 2 +- operator/provider/README.md | 6 +----- .../gpu-node-label.md | 2 +- .../gpu-provider-configuration.md | 9 +-------- .../gpu-provider-troubleshooting.md | 2 +- .../README.md | 2 +- ...ep-1-prerequisites-of-an-akash-provider.md | 2 +- .../step-11-disable-unattended-upgrades.md | 3 +++ .../step-2-kubernetes-configurations.md | 2 +- .../akash-provider-attribute-updates.md | 4 ++-- ...step-6-verify-current-provider-settings.md | 2 +- .../deploy-persistent-storage.md | 2 +- ...ider-attributes-and-pricing-adjustments.md | 2 +- .../README.md | 4 ++-- .../step-1-clone-the-kubespray-project.md | 20 +++++++++---------- ...-3-ansible-access-to-kubernetes-cluster.md | 4 ++-- .../step-4-ansible-inventory.md | 1 - .../step-5-enable-gvisor.md | 2 ++ .../create-kubernetes-cluster.md | 7 ++----- .../disable-search-domains.md | 2 +- .../nvidia-runtime-configuration.md | 6 +----- 21 files changed, 36 insertions(+), 50 deletions(-) diff --git a/akash-nodes/akash-node-via-helm-charts/prepare-kubernetes-cluster.md b/akash-nodes/akash-node-via-helm-charts/prepare-kubernetes-cluster.md index 8551911c..b570cf3a 100644 --- a/akash-nodes/akash-node-via-helm-charts/prepare-kubernetes-cluster.md +++ b/akash-nodes/akash-node-via-helm-charts/prepare-kubernetes-cluster.md @@ -16,7 +16,7 @@ kubectl label ns akash-services akash.network/name=akash-services akash.network= ## Install Helm * Install Helm for Kubernetes package management if not done so prior -* Execute on these steps on a Kubernetes master node +* Execute on these steps on a Kubernetes control plane node ``` wget https://get.helm.sh/helm-v3.11.0-linux-amd64.tar.gz diff --git a/operator/provider/README.md b/operator/provider/README.md index 7b2cc02e..a3a25e10 100644 --- a/operator/provider/README.md +++ b/operator/provider/README.md @@ -32,7 +32,7 @@ At ths point you would be left with a Kubernetes cluster that is ready to be a p The recommended method for setting up a Kubernetes cluster is to use the [Kubespray](https://github.com/kubernetes-sigs/kubespray) project. This project is a collection of ansible resources for setting up a Kubernetes cluster. -The recommended minimum number of machines is three. One machine hosts the Kubernetes master node & provider, with the other machines hosting the compute nodes. It is possible however to provision a single-machine cluster if you choose to, but this configuration is not recommended. +The recommended minimum number of machines is three. One machine hosts the Kubernetes control plane node & provider, with the other machines hosting the compute nodes. It is possible however to provision a single-machine cluster if you choose to, but this configuration is not recommended. ### Getting kubespray & setup @@ -67,7 +67,6 @@ Example single node configuration \(not recommended\) ```text all: vars: - cluster_id: "1.0.0.1" ansible_user: root hosts: mynode: @@ -97,14 +96,11 @@ This Ansible inventory file defines a single node file with a host named "mynode The host is placed into the groups `kube-master`, `etcd`, `kube-node`, and `calico-rr`. All hosts in those groups are then placed into the `k8s-cluster` group. This is similar to a standard configuration for a Kubernetes cluster, but utilizes Calico for networking. Calico is the only networking solution for the Kubernetes cluster that Akash officially supports at this time. -One important detail is the value `cluster_id` which is assigned to all nodes by using the `all` group under `vars` in the YAML file. This value is used by Calico to uniquely identify a set of resources. For a more in depth explanation [see this document](https://hub.docker.com/r/calico/routereflector/). - Example multinode configuration, with a single master ```text all: vars: - cluster_id: "1.0.0.1" ansible_user: root hosts: mymaster: diff --git a/other-resources/experimental/build-a-cloud-provider/gpu-resource-enablement-optional-step/gpu-node-label.md b/other-resources/experimental/build-a-cloud-provider/gpu-resource-enablement-optional-step/gpu-node-label.md index 59075fd8..8879d7f3 100644 --- a/other-resources/experimental/build-a-cloud-provider/gpu-resource-enablement-optional-step/gpu-node-label.md +++ b/other-resources/experimental/build-a-cloud-provider/gpu-resource-enablement-optional-step/gpu-node-label.md @@ -4,7 +4,7 @@ Each node that provides GPUs must be labeled correctly. -> _**NOTE**_ - these configurations should be completed on a Kubernetes master/control plane node +> _**NOTE**_ - these configurations should be completed on a Kubernetes control plane node ## Label Template diff --git a/other-resources/experimental/build-a-cloud-provider/gpu-resource-enablement-optional-step/gpu-provider-configuration.md b/other-resources/experimental/build-a-cloud-provider/gpu-resource-enablement-optional-step/gpu-provider-configuration.md index 08b98339..86aaeb8a 100644 --- a/other-resources/experimental/build-a-cloud-provider/gpu-resource-enablement-optional-step/gpu-provider-configuration.md +++ b/other-resources/experimental/build-a-cloud-provider/gpu-resource-enablement-optional-step/gpu-provider-configuration.md @@ -102,8 +102,8 @@ Update the nvidia-container-runtime config in order to prevent `NVIDIA_VISIBLE_D Make sure the config file `/etc/nvidia-container-runtime/config.toml` contains these line uncommmented and set to these values: ``` -accept-nvidia-visible-devices-envvar-when-unprivileged = false accept-nvidia-visible-devices-as-volume-mounts = true +accept-nvidia-visible-devices-envvar-when-unprivileged = false ``` > _**NOTE**_ - `/etc/nvidia-container-runtime/config.toml` is part of `nvidia-container-toolkit-base` package; so it won't override the customer-set parameters there since it is part of the `/var/lib/dpkg/info/nvidia-container-toolkit-base.conffiles` @@ -120,10 +120,6 @@ In this step we add the NVIDIA runtime confguration into the Kubespray inventory ``` cat > ~/kubespray/inventory/akash/group_vars/all/akash.yml <<'EOF' -ansible_user: root - -ansible_connection: ssh - containerd_additional_runtimes: - name: nvidia type: "io.containerd.runc.v2" @@ -139,9 +135,6 @@ EOF ``` cd ~/kubespray -###Execute following command if not already in the Python virtual environment -###Creation and activation of virtual evironment described further here: -###https://docs.akash.network/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-2-install-ansible source venv/bin/activate ansible-playbook -i inventory/akash/hosts.yaml -b -v --private-key=~/.ssh/id_rsa cluster.yml diff --git a/providers/akash-provider-troubleshooting/gpu-provider-troubleshooting.md b/providers/akash-provider-troubleshooting/gpu-provider-troubleshooting.md index b07c8fdd..eafb7e6a 100644 --- a/providers/akash-provider-troubleshooting/gpu-provider-troubleshooting.md +++ b/providers/akash-provider-troubleshooting/gpu-provider-troubleshooting.md @@ -97,7 +97,7 @@ dmesg -T | grep -Ei 'nvidia|nvml|cuda|mismatch' ## Ensure Correct Version/Presence of NVIDIA Device Plugin -> _**NOTE**_ - conduct this verification step on the Kubernetes master node on which Helm was installed during your Akash Provider build +> _**NOTE**_ - conduct this verification step on the Kubernetes control plane node on which Helm was installed during your Akash Provider build ``` helm -n nvidia-device-plugin list diff --git a/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/README.md b/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/README.md index 8baedab8..0a34c03c 100644 --- a/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/README.md +++ b/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/README.md @@ -4,7 +4,7 @@ An Akash Provider leases compute to users launching new deployments. Follow the steps in this guide to build your own provider. -This guide uses a single Kubernetes master node. +This guide uses a single Kubernetes control plane node. ## Overview and links to the steps involved in Akash Provider Build: diff --git a/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-1-prerequisites-of-an-akash-provider.md b/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-1-prerequisites-of-an-akash-provider.md index f4d2dffb..ddc4228e 100644 --- a/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-1-prerequisites-of-an-akash-provider.md +++ b/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-1-prerequisites-of-an-akash-provider.md @@ -56,7 +56,7 @@ In this section we perform the following DNS adjustments: > _**NOTE**_ - the DNS resolution issue & the Netplan fix addressed in this step are described [here](https://github.com/akash-network/support/issues/80) -Apply the following to all Kubernetes master and worker nodes. +Apply the following to all Kubernetes control plane and worker nodes. > _**IMPORTANT**_ - Make sure you do not have any other config files under the `/etc/netplan` directory, otherwise it could cause unexpected networking issues / issues with booting up your node. diff --git a/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-11-disable-unattended-upgrades.md b/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-11-disable-unattended-upgrades.md index 00605860..4a86d671 100644 --- a/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-11-disable-unattended-upgrades.md +++ b/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-11-disable-unattended-upgrades.md @@ -16,6 +16,9 @@ To disable unattended upgrades, execute these two commands on your Kubernetes wo echo -en 'APT::Periodic::Update-Package-Lists "0";\nAPT::Periodic::Unattended-Upgrade "0";\n' | tee /etc/apt/apt.conf.d/20auto-upgrades apt remove unattended-upgrades + +systemctl stop unattended-upgrades.service +systemctl mask unattended-upgrades.service ``` ## Verify diff --git a/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-2-kubernetes-configurations.md b/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-2-kubernetes-configurations.md index 65bfdc26..00c95f75 100644 --- a/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-2-kubernetes-configurations.md +++ b/providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-2-kubernetes-configurations.md @@ -2,7 +2,7 @@ Create Provider namespaces on your Kubernetes cluster. -Run these commands from a Kubernetes master node which has kubectl access to cluster. +Run these commands from a Kubernetes control plane node which has kubectl access to cluster. ``` kubectl create ns akash-services diff --git a/providers/build-a-cloud-provider/akash-provider-attribute-updates.md b/providers/build-a-cloud-provider/akash-provider-attribute-updates.md index a1330660..88cd2c26 100644 --- a/providers/build-a-cloud-provider/akash-provider-attribute-updates.md +++ b/providers/build-a-cloud-provider/akash-provider-attribute-updates.md @@ -2,9 +2,9 @@ ## Initial Guidance and Assumptions -* Conduct all steps in this guide from a Kubernetes master node in your Akash Provider cluster. +* Conduct all steps in this guide from a Kubernetes control plane node in your Akash Provider cluster. * Guide assumes that your Akash Provider was installed via Helm Charts as detailed in this [guide](../../providers/build-a-cloud-provider/helm-based-provider-persistent-storage-enablement/). -* Guide assumes that the Kubernetes master node used has Helm installed. Refer to this [guide](../../providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-4-helm-installation-on-kubernetes-node.md) step if a Helm install is needed. Return to this guide once Helm install is completed. +* Guide assumes that the Kubernetes control plane node used has Helm installed. Refer to this [guide](../../providers/build-a-cloud-provider/akash-cloud-provider-build-with-helm-charts/step-4-helm-installation-on-kubernetes-node.md) step if a Helm install is needed. Return to this guide once Helm install is completed. ## Caveats diff --git a/providers/build-a-cloud-provider/akash-provider-checkup/step-6-verify-current-provider-settings.md b/providers/build-a-cloud-provider/akash-provider-checkup/step-6-verify-current-provider-settings.md index fe121645..a2e2c93d 100644 --- a/providers/build-a-cloud-provider/akash-provider-checkup/step-6-verify-current-provider-settings.md +++ b/providers/build-a-cloud-provider/akash-provider-checkup/step-6-verify-current-provider-settings.md @@ -6,7 +6,7 @@ Use the steps covered in this section to verify the current settings of your run > Steps in this section assume the provider was installed via Akash Provider Helm Charts. > -> Conduct the steps from a Kubernetes master node with `kubectl` access to the cluster. +> Conduct the steps from a Kubernetes control plane node with `kubectl` access to the cluster. ## View Provider Current Settings diff --git a/providers/build-a-cloud-provider/helm-based-provider-persistent-storage-enablement/deploy-persistent-storage.md b/providers/build-a-cloud-provider/helm-based-provider-persistent-storage-enablement/deploy-persistent-storage.md index a6eeb525..a33866ca 100644 --- a/providers/build-a-cloud-provider/helm-based-provider-persistent-storage-enablement/deploy-persistent-storage.md +++ b/providers/build-a-cloud-provider/helm-based-provider-persistent-storage-enablement/deploy-persistent-storage.md @@ -4,7 +4,7 @@ Install Helm and add the Akash repo if not done previously by following the steps in this [guide](../akash-cloud-provider-build-with-helm-charts/step-4-helm-installation-on-kubernetes-node.md)**.** -All steps in this section should be conducted from the Kubernetes master node on which Helm has been installed. +All steps in this section should be conducted from the Kubernetes control plane node on which Helm has been installed. Rook has published the following Helm charts for the Ceph storage provider: diff --git a/providers/build-a-cloud-provider/helm-based-provider-persistent-storage-enablement/provider-attributes-and-pricing-adjustments.md b/providers/build-a-cloud-provider/helm-based-provider-persistent-storage-enablement/provider-attributes-and-pricing-adjustments.md index eb31958d..e22ef39f 100644 --- a/providers/build-a-cloud-provider/helm-based-provider-persistent-storage-enablement/provider-attributes-and-pricing-adjustments.md +++ b/providers/build-a-cloud-provider/helm-based-provider-persistent-storage-enablement/provider-attributes-and-pricing-adjustments.md @@ -2,7 +2,7 @@ ## Attribute Adjustments -* Conduct the steps in this section on the Kubernetes master from which the provider was configured in prior steps +* Conduct the steps in this section on the Kubernetes control plane from which the provider was configured in prior steps * Adjust the following key-values pairs as necessary within the `provider-storage.yaml` file created below: * Update the values of the `capabilities/storage/2/class` key to the correct class type (I.e. `beta2`). Reference the [Storage Class Types](storage-class-types.md) doc section for additional details. * Update the region value from current `us-west` to an appropriate value such as `us-east` OR `eu-west` diff --git a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/README.md b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/README.md index c52045a7..38f83982 100644 --- a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/README.md +++ b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/README.md @@ -2,7 +2,7 @@ ## **Overview** -Akash leases are deployed via Kubernetes pods on provider clusters. This guide details the build of the provider’s Kubernetes control plane and worker nodes. +Akash leases are deployed as Kubernetes pods on provider clusters. This guide details the build of the provider’s Kubernetes control plane and worker nodes. The setup of a Kubernetes cluster is the responsibility of the provider. This guide provides best practices and recommendations for setting up a Kubernetes cluster. This document is not a comprehensive guide and assumes pre-existing Kubernetes knowledge. @@ -15,7 +15,7 @@ The Kubernetes instructions in this guide are intended for audiences that have t * **Server Administration Skills** - necessary for setting up servers/network making up the Kubernetes cluster * **Kubernetes Experience** - a base level of Kubernetes administration is highly recommended -Please consider using the [Praetor](../../community-solutions/praetor.md) application to build an Akash Provider for small and medium sized environments which require little customization. +> Please consider using the [Praetor](../../community-solutions/praetor.md) application to build an Akash Provider for small and medium sized environments which require little customization. ## Guide Sections diff --git a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-1-clone-the-kubespray-project.md b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-1-clone-the-kubespray-project.md index 3c0e9d65..dd1d4b17 100644 --- a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-1-clone-the-kubespray-project.md +++ b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-1-clone-the-kubespray-project.md @@ -6,26 +6,26 @@ We recommend using the Kubespray project to deploy a cluster. Kubespray uses Ans The recommended minimum number of hosts is four for a production Provider Kubernetes cluster. This is meant to allow: -* Three hosts serving as a redundant control plane/master instances +* Three hosts serving as a redundant control plane (aka master)/etcd instances * One host to serve as Kubernetes worker node to host provider leases. ### Additional Cluster Sizing Considerations -> While a production Kubernetes cluster would typically require three redundant control plane nodes, in circumstances in which the control plane node is easily recoverable the use of a single control instance for Akash providers should suffice. +* While a production Kubernetes cluster would typically require three redundant control plane nodes, in circumstances in which the control plane node is easily recoverable the use of a single control instance for Akash providers should suffice. -> The number of control plane nodes in the cluster should always be an odd number to allow the cluster to reach consensus. +* The number of control plane nodes in the cluster should always be an odd number to allow the cluster to reach consensus. -> We recommend running a single worker node per physical server as CPU is typically the largest resource bottleneck. The use of a single worker node allows larger workloads to be deployed on your provider. +* We recommend running a single worker node per physical server as CPU is typically the largest resource bottleneck. The use of a single worker node allows larger workloads to be deployed on your provider. -> If you intended to build a provider with persistent storage please refer to host requirements detailed [here](../helm-based-provider-persistent-storage-enablement/persistent-storage-requirements.md). +* If you intended to build a provider with persistent storage please refer to host storage requirements detailed [here](../helm-based-provider-persistent-storage-enablement/persistent-storage-requirements.md). ## Kubernetes Cluster Software/Hardware Requirements and Recommendations ### Software Recommendation -Akash Providers have been tested on Ubuntu 22.04 with the default Linux kernel. Your experience may vary should install be attempted using a different Linux distro/kernel. +Akash Providers have been tested on **Ubuntu 22.04** with the default Linux kernel. Your experience may vary should install be attempted using a different Linux distro/kernel. -### Kubernetes Master Node Requirements +### Kubernetes Control Plane Node Requirements * Minimum Specs * 2 CPU @@ -36,7 +36,7 @@ Akash Providers have been tested on Ubuntu 22.04 with the default Linux kernel. * 8 GB RAM * 40 GB disk -### Kubernetes Work Node Requirements +### Kubernetes Worker Node Requirements * Minimum Specs * 4 CPU @@ -48,7 +48,7 @@ Akash Providers have been tested on Ubuntu 22.04 with the default Linux kernel. ## **etcd Hardware Recommendations** -* Use this [guide](https://etcd.io/docs/v3.3/op-guide/hardware) to ensure Kubernetes control plane nodes meet the recommendations for hosting a `etcd` database. +* Use this [guide](https://etcd.io/docs/v3.5/op-guide/hardware) to ensure Kubernetes control plane nodes meet the recommendations for hosting a `etcd` database. ## **Kubespray Clone** @@ -65,7 +65,7 @@ Obtain Kubespray and navigate into the created local directory: ``` cd ~ -git clone -b v2.23.0 --depth=1 https://github.com/kubernetes-sigs/kubespray.git +git clone -b v2.23.1 --depth=1 https://github.com/kubernetes-sigs/kubespray.git cd kubespray ``` diff --git a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-3-ansible-access-to-kubernetes-cluster.md b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-3-ansible-access-to-kubernetes-cluster.md index de584a9d..b7e93617 100644 --- a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-3-ansible-access-to-kubernetes-cluster.md +++ b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-3-ansible-access-to-kubernetes-cluster.md @@ -43,7 +43,7 @@ ssh-copy-id -i ~/.ssh/id_rsa.pub @ ### **Example** -* Conduct this step for every Kubernetes master and worker node in the cluster +* Conduct this step for every Kubernetes control plane and worker node in the cluster ``` ssh-copy-id -i ~/.ssh/id_rsa.pub root@10.88.94.5 @@ -63,7 +63,7 @@ ssh -i ~/.ssh/id_rsa @ ### **Example** -* Conduct this access test for every Kubernetes master and worker node in the cluster +* Conduct this access test for every Kubernetes control plane and worker node in the cluster ``` ssh -i ~/.ssh/id_rsa root@10.88.94.5 diff --git a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-4-ansible-inventory.md b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-4-ansible-inventory.md index a749819e..76790e9f 100644 --- a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-4-ansible-inventory.md +++ b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-4-ansible-inventory.md @@ -118,7 +118,6 @@ vi ~/kubespray/inventory/akash/hosts.yaml ``` * Within the YAML file’s “all” stanza and prior to the “hosts” sub-stanza level - insert the following vars stanza -* We currently recommend disabling TCP offloading on vxlan.calico interface until calico fixes a related bug. This only applies when Calico is configured to use VXLAN encapsulation. Read more about this bug [here](https://github.com/kubernetes-sigs/kubespray/pull/9261#issuecomment-1248844913). ``` vars: diff --git a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-5-enable-gvisor.md b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-5-enable-gvisor.md index e8e1967d..0a15e149 100644 --- a/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-5-enable-gvisor.md +++ b/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-5-enable-gvisor.md @@ -24,4 +24,6 @@ container_manager: containerd ## **gVisor Issue - No system-cgroup v2 Support** +> Skip if you are not using gVisor + If you are using a newer systemd version, your container will get stuck in ContainerCreating state on your provider with gVisor enabled. Please reference [this document](../gvisor-issue-no-system-cgroup-v2-support.md) for details regarding this issue and the recommended workaround. diff --git a/providers/build-a-cloud-provider/provider-build-with-gpu/create-kubernetes-cluster.md b/providers/build-a-cloud-provider/provider-build-with-gpu/create-kubernetes-cluster.md index 6ee26524..fb335bfe 100644 --- a/providers/build-a-cloud-provider/provider-build-with-gpu/create-kubernetes-cluster.md +++ b/providers/build-a-cloud-provider/provider-build-with-gpu/create-kubernetes-cluster.md @@ -13,9 +13,6 @@ With inventory in place we are ready to build the Kubernetes cluster via Ansible ``` cd ~/kubespray -###Execute following command if not already in the Python virtual environment -###Creation and activation of virtual evironment described further here: -###https://docs.akash.network/providers/build-a-cloud-provider/kubernetes-cluster-for-akash-providers/step-2-install-ansible source venv/bin/activate ansible-playbook -i inventory/akash/hosts.yaml -b -v --private-key=~/.ssh/id_rsa cluster.yml @@ -25,7 +22,7 @@ ansible-playbook -i inventory/akash/hosts.yaml -b -v --private-key=~/.ssh/id_rsa Each node that provides GPUs must be labeled correctly. -> _**NOTE**_ - these configurations should be completed on a Kubernetes master/control plane node +> _**NOTE**_ - these configurations should be completed on a Kubernetes control plane node ### Label Template @@ -71,7 +68,7 @@ Labels: akash.network/capabilities.gpu.vendor.nvidia.model.a4000=tru ## Additional Kubernetes Configurations -> _**NOTE**_ - these configurations should be completed on a Kubernetes master/control plane node +> _**NOTE**_ - these configurations should be completed on a Kubernetes control plane node ``` kubectl create ns akash-services diff --git a/providers/build-a-cloud-provider/provider-build-with-gpu/disable-search-domains.md b/providers/build-a-cloud-provider/provider-build-with-gpu/disable-search-domains.md index 67028bb3..0a5e4c1f 100644 --- a/providers/build-a-cloud-provider/provider-build-with-gpu/disable-search-domains.md +++ b/providers/build-a-cloud-provider/provider-build-with-gpu/disable-search-domains.md @@ -18,7 +18,7 @@ In this section we perform the following DNS adjustments: > _**NOTE**_ - the DNS resolution issue & the Netplan fix addressed in this step are described [here](https://github.com/akash-network/support/issues/80) -Apply the following to all Kubernetes master and worker nodes. +Apply the following to all Kubernetes control plane and worker nodes. > _**IMPORTANT**_ - Make sure you do not have any other config files under the `/etc/netplan` directory, otherwise it could cause unexpected networking issues / issues with booting up your node. diff --git a/providers/build-a-cloud-provider/provider-build-with-gpu/nvidia-runtime-configuration.md b/providers/build-a-cloud-provider/provider-build-with-gpu/nvidia-runtime-configuration.md index 8ffb10cc..b91f43ec 100644 --- a/providers/build-a-cloud-provider/provider-build-with-gpu/nvidia-runtime-configuration.md +++ b/providers/build-a-cloud-provider/provider-build-with-gpu/nvidia-runtime-configuration.md @@ -11,8 +11,8 @@ Update the nvidia-container-runtime config in order to prevent `NVIDIA_VISIBLE_D Make sure the config file `/etc/nvidia-container-runtime/config.toml` contains these line uncommmented and set to these values: ``` -accept-nvidia-visible-devices-envvar-when-unprivileged = false accept-nvidia-visible-devices-as-volume-mounts = true +accept-nvidia-visible-devices-envvar-when-unprivileged = false ``` > _**NOTE**_ - `/etc/nvidia-container-runtime/config.toml` is part of `nvidia-container-toolkit-base` package; so it won't override the customer-set parameters there since it is part of the `/var/lib/dpkg/info/nvidia-container-toolkit-base.conffiles` @@ -25,10 +25,6 @@ In this step we add the NVIDIA runtime confguration into the Kubespray inventory ``` cat > ~/kubespray/inventory/akash/group_vars/all/akash.yml <<'EOF' -ansible_user: root - -ansible_connection: ssh - containerd_additional_runtimes: - name: nvidia type: "io.containerd.runc.v2"