Skip to content

Commit

Permalink
scylla-ansible-roles: Adds example playbook "kernel_version_enforcer"
Browse files Browse the repository at this point in the history
"kernel_version_enforcer" playbook allow the user to:
- Pin a specific kernel version (and ensure it will be picked in the next reboot) if required
- Upgrade kernel version to the latest available
- Purge all old kernel versions
- Upgrade all upgradable packages

Signed-off-by: Eduardo Benzecri <[email protected]>
  • Loading branch information
ebenzecri committed Aug 15, 2024
1 parent 34f0e15 commit 24c063d
Show file tree
Hide file tree
Showing 5 changed files with 379 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
---

- name: Kernel Version Enforcer
hosts: scylla
gather_facts: true
serial: 1
vars:
api_address: 127.0.0.1
api_delay: 10
api_port: 10000
api_retries: 360
api_timeout: 300
cql_timeout: 86400
reboot_timeout: 600
systemd_unit_retries: 5
systemd_unit_delay: 30
grub_config_file: /boot/grub/grub.cfg
image_package_prefix: linux-image
image_version: 5.15.0-1051-gcp
kernel_related_packages:
- linux-gcp
- linux-image-gcp
- linux-headers-gcp
pid_kill_delay: 12
pid_kill_retries: 5
pause_time: 15
pin_kernel_version: false
purge_older_images: false
upgrade_all_packages: false
upgrade_latest_kernel: false
tasks:
- name: Enforce kernel version for Ubuntu
ansible.builtin.include_tasks: ubuntu/main.yml
when: ansible_distribution == "Ubuntu"
30 changes: 30 additions & 0 deletions example-playbooks/kernel_version_enforcer/ubuntu/grub.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---

- name: Get {{ grub_config_file }} metadata
ansible.builtin.stat:
path: "{{ grub_config_file }}"
register: grub_config

- name: Fail if GRUB config file doesn't exist
ansible.builtin.fail:
msg: "{{ grub_config_file }} doesn't exist"
when: not grub_config.stat.exists

- name: Get GRUB entries
ansible.builtin.command: grep -E "^\smenuentry" {{ grub_config_file }}
register: grub_entries

- name: Get GRUB index for '{{ image_package_prefix }}-{{ image_version }}'
ansible.builtin.set_fact:
target_grub_index="{{ grub_index }}"
when:
- image_version in item
- not "recovery mode" in item
- target_grub_index is not defined
loop: "{{ grub_entries.stdout_lines }}"
loop_control:
index_var: grub_index

- name: Set index '1>{{ target_grub_index }}' to be used in the next reboot
ansible.builtin.command: grub-reboot "1>{{ target_grub_index }}"
become: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
---

- name: Purge all kernel images newer than '{{ final_image_version }}'
ansible.builtin.apt:
name: "{{ image_package_prefix }}-{{ item }}"
state: absent
purge: true
become: true
when: item is version(final_image_version, '>')
loop: "{{ vmlinuz_versions.stdout_lines }}"

- name: Erase all kernel images related files newer than '{{ final_image_version }}'
ansible.builtin.shell: rm -f /boot/*-{{ item }}
become: true
when: item is version(final_image_version, '>')
loop: "{{ vmlinuz_versions.stdout_lines }}"

- name: Purge all kernel images older than '{{ final_image_version }}'
ansible.builtin.apt:
name: "{{ image_package_prefix }}-{{ item }}"
state: absent
purge: true
become: true
when:
- purge_older_images
- item is version(final_image_version, '<')
loop: "{{ vmlinuz_versions.stdout_lines }}"

- name: Erase all kernel images related files older than '{{ final_image_version }}'
ansible.builtin.shell: rm -f /boot/*-{{ item }}
become: true
when:
- purge_older_images
- item is version(final_image_version, '<')
loop: "{{ vmlinuz_versions.stdout_lines }}"

- name: Remove useless packages from the cache
ansible.builtin.apt:
autoclean: true
become: true

- name: Reconfigure '{{ image_package_prefix }}-{{ final_image_version }}' package
ansible.builtin.command: dpkg-reconfigure {{ image_package_prefix }}-{{ final_image_version }} -f noninteractive -p critical
become: true

- name: Get /boot/vmlinuz metadata
ansible.builtin.stat:
path: /boot/vmlinuz
register: vmlinuz

- name: Fail if /boot/vmlinuz is not a symbolic link of /boot/vmlinuz-{{ final_image_version }}
ansible.builtin.fail:
msg: "/boot/vmlinuz is not a symbolic link of /boot/vmlinuz-{{ final_image_version }}"
when:
- not vmlinuz.stat.islnk
- not vmlinuz.stat.lnk_source is /boot/vmlinuz-{{ final_image_version }}

- name: Get /boot/initrd.img metadata
ansible.builtin.stat:
path: /boot/initrd.img
register: initrd

- name: Fail if /boot/initrd.img is not a symbolic link of /boot/initrd.img-{{ final_image_version }}
ansible.builtin.fail:
msg: "/boot/initrd.img is not a symbolic link of /boot/initrd.img-{{ final_image_version }}"
when:
- not initrd.stat.islnk
- not initrd.stat.lnk_source is /boot/initrd.img-{{ final_image_version }}

- name: Clean all non-required packages
ansible.builtin.apt:
autoclean: true
autoremove: true
force_apt_get: true
become: true
118 changes: 118 additions & 0 deletions example-playbooks/kernel_version_enforcer/ubuntu/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
---

- name: Get current kernel image version
ansible.builtin.command: uname --kernel-release
register: uname_pre_output

- name: Save kernel image version
ansible.builtin.set_fact:
detected_image_version="{{ uname_pre_output.stdout_lines | first }}"

- name: Define if the kernel image should be installed
ansible.builtin.set_fact:
kernel_image_required="{{ image_version is version(detected_image_version, 'ne') or upgrade_latest_kernel }}"

- name: Mark to unhold kernel-related packages
ansible.builtin.dpkg_selections:
name: "{{ item }}"
selection: install
loop: "{{ kernel_related_packages }}"
become: true
when: kernel_image_required

- name: Ensure kernel image '{{ image_package_prefix }}-{{ image_version }}' is installed
ansible.builtin.apt:
name: "{{ image_package_prefix }}-{{ image_version }}"
state: present
become: true
when:
- kernel_image_required
- not upgrade_latest_kernel

- name: Upgrade kernel-related packages to the latest version available
ansible.builtin.apt:
name: "{{ item }}"
state: latest
update_cache: true
autoclean: true
autoremove: true
force_apt_get: true
loop: "{{ kernel_related_packages }}"
become: true
when: upgrade_latest_kernel

- name: Mark to hold kernel-related packages
ansible.builtin.dpkg_selections:
name: "{{ item }}"
selection: hold
loop: "{{ kernel_related_packages }}"
become: true
when: pin_kernel_version

- name: Upgrade all upgradable packages
ansible.builtin.apt:
name: "*"
state: latest
update_cache: true
autoclean: true
autoremove: true
force_apt_get: true
become: true
when: upgrade_all_packages

- name: Get all vmlinuz files available
ansible.builtin.shell: ls /boot/vmlinuz-* | sed 's/\/boot\/vmlinuz-*//'
register: vmlinuz_versions

- name: Define if reconfiguration is required due to the presence of serveral vmlinuz files
ansible.builtin.set_fact:
reconfiguration_required="{{ vmlinuz_versions.stdout_lines | length > 1 }}"

- name: Mark to unhold kernel-related packages
ansible.builtin.dpkg_selections:
name: "{{ item }}"
selection: install
loop: "{{ kernel_related_packages }}"
become: true
when: reconfiguration_required

- name: Prepare GRUB modifications
ansible.builtin.include_tasks: grub.yml
when:
- reconfiguration_required
- not upgrade_latest_kernel

- name: Stop, reboot and start each node (if required)
ansible.builtin.include_tasks: stop_reboot_start.yml
when: reconfiguration_required

- name: Set final kernel image version if '{{ image_version }}' was installed
ansible.builtin.set_fact:
final_image_version="{{ image_version }}"
when: not upgrade_latest_kernel

- name: Set final kernel image version if the latest one was installed
ansible.builtin.set_fact:
final_image_version="{{ target_image_version }}"
when:
- reconfiguration_required
- upgrade_latest_kernel

- name: Enforce kernel version '{{ final_image_version }}' usage
ansible.builtin.include_tasks: kernel_enforce_cleanup.yml
when: reconfiguration_required

- name: Mark to hold kernel-related packages
ansible.builtin.dpkg_selections:
name: "{{ item }}"
selection: hold
loop: "{{ kernel_related_packages }}"
become: true
when:
- pin_kernel_version
- reconfiguration_required

- name: Make a pause of {{ pause_time }} seconds
ansible.builtin.wait_for:
timeout: "{{ pause_time | int }}"
when: kernel_image_required
122 changes: 122 additions & 0 deletions example-playbooks/kernel_version_enforcer/ubuntu/stop_reboot_start.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
---
- name: Populate service facts
ansible.builtin.service_facts:

- name: Check if Scylla is installed
ansible.builtin.set_fact:
scylla_installation="{{ true if ansible_facts.services['scylla-server.service'] is defined else false }}"

- name: Stop Scylla
block:
- name: Mask scylla-server service
ansible.builtin.systemd:
name: scylla-server
masked: true
become: true

- name: Drain node
ansible.builtin.uri:
url: "http://{{ api_address }}:{{ api_port }}/storage_service/drain"
method: POST
retries: "{{ api_retries }}"
delay: "{{ api_delay }}"
timeout: "{{ api_timeout }}"

- name: Check if the node if fully drained
ansible.builtin.uri:
url: "http://{{ api_address }}:{{ api_port }}/storage_service/operation_mode"
method: GET
retries: "{{ api_retries }}"
delay: "{{ api_delay }}"
timeout: "{{ api_timeout }}"
register: node_drain_status
failed_when: "'DRAINED' not in node_drain_status.json"

- name: Stop scylla-manager-agent service (if exists)
ansible.builtin.systemd:
name: scylla-manager-agent
enabled: true
state: stopped
become: true
when: ansible_facts.services['scylla-manager-agent.service'] is defined

- name: Stop scylla-server service
ansible.builtin.systemd:
name: scylla-server
state: stopped
become: true
when: "'DRAINED' in node_drain_status.json"
rescue:
- name: Send a SIGKILL to Scylla PID
ansible.builtin.shell: kill -9 $(pidof scylla)
register: scylla_kill_pid
retries: "{{ pid_kill_retries }}"
delay: "{{ pid_kill_delay }}"
until: scylla_kill_pid.rc == 2
failed_when: scylla_kill_pid.rc != 2
become: true
always:
- name: Unask scylla-server service
ansible.builtin.systemd:
name: scylla-server
masked: false
become: true
when:
- scylla_installation
- kernel_image_required

- name: Reboot and post-reboot checks
block:
- name: Reboot the node
ansible.builtin.reboot:
reboot_timeout: "{{ reboot_timeout }}"
become: true

- name: Get current kernel image version
ansible.builtin.shell: uname --kernel-release
register: uname_post_output

- name: Save kernel image version
ansible.builtin.set_fact:
target_image_version="{{ uname_post_output.stdout_lines | first }}"

- name: Fail if kernel image version '{{ image_version }}' is not currently in use
ansible.builtin.fail:
msg: "'{{ image_version }}' is not currently used"
when:
- target_image_version is version(image_version, 'ne')
- not upgrade_latest_kernel
when: kernel_image_required

- name: Start Scylla
block:
- name: Get listen address
ansible.builtin.shell: grep '^listen_address:' /etc/scylla/scylla.yaml | awk '{ print $2 }'
register: listen_address

- name: Start scylla-server service
ansible.builtin.systemd:
name: scylla-server
state: started
retries: "{{ systemd_unit_retries }}"
delay: "{{ systemd_unit_delay }}"
become: true
when:
- ansible_facts.services['scylla-server.service'] is defined
- ansible_facts.services['scylla-server.service'].status == "disabled"

- name: Wait for CQL port on {{ listen_address.stdout }}
ansible.builtin.wait_for:
port: 9042
host: "{{ listen_address.stdout }}"
timeout: "{{ cql_timeout }}"

- name: Wait for the cluster to become healthy
ansible.builtin.shell: nodetool status | grep "{{ listen_address.stdout }}" | grep '^UN'
register: node_status
until: node_status.rc == 0
retries: "{{ api_retries }}"
delay: "{{ api_delay }}"
when:
- scylla_installation
- kernel_image_required

0 comments on commit 24c063d

Please sign in to comment.