diff --git a/.ansible-lint b/.ansible-lint index d8dc293..c1fba50 100644 --- a/.ansible-lint +++ b/.ansible-lint @@ -5,3 +5,4 @@ skip_list: - no-changed-when - run-once[play] - name[template] + - jinja[spacing] diff --git a/.gitignore b/.gitignore index 6be675f..18a4ed6 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ filter_plugins/*.bak python/ collections/ansible_collections roles/autofsck +*.bak diff --git a/bigboot-noop.yml b/bigboot-noop.yml index e33fd4d..8e31f6b 100644 --- a/bigboot-noop.yml +++ b/bigboot-noop.yml @@ -18,79 +18,17 @@ - name: Capture logical volume information ansible.builtin.import_tasks: tasks/capture_lv_device_details.yml - -- name: Perform a ReaR backup if any disk modifications are to be made - ansible.builtin.import_playbook: rhc.rear.rear_backup - when: - - bigboot_execute_bigboot | default('false') | bool - - not bigboot_skip_rear_backup | default('true') | bool - - -- name: Perform logical volume and boot parition resizing as needed - hosts: all - become: true - gather_facts: true - strategy: free - - vars_files: - - bigboot_vars.yml - - tasks: - - name: Perform service and filesystem checks prior to Bigboot execution - when: - - (bigboot_execute_shrink_lv | bool or bigboot_execute_bigboot | bool) - block: - # - name: Check for and disable services exceeding the timeout threshold - # ansible.builtin.import_tasks: tasks/check_systemd_services.yml - - - name: Enable Grub filesystem check - ansible.builtin.import_role: - name: autofsck - tasks_from: main.yml - - - name: Flush handlers - ansible.builtin.meta: flush_handlers - - # Make sure to update the reboot code for the WF environment - - name: Reboot to run filesystem checks - ansible.builtin.reboot: - - - name: Disable Grub filesystem check - ansible.builtin.import_role: - name: autofsck - tasks_from: cleanup.yml - - - name: Flush handlers - ansible.builtin.meta: flush_handlers - - - - name: Extend the timeout values for physical hosts - ansible.builtin.set_fact: - initramfs_post_reboot_delay: 300 - initramfs_reboot_timeout: 14400 - when: - - "'host' in ansible_virtualization_role" - - - name: Shrink the logical volume to support /boot expansion - ansible.builtin.debug: - msg: - - "device: {{ bigboot_adjacent_lvm_device }}" - - "size : {{ bigboot_lv_shrink_size | int }}" - when: - - bigboot_execute_shrink_lv | bool - - - name: Expand the /boot partition as requested - ansible.builtin.debug: - msg: "{{ bigboot_size }}" - when: - - bigboot_execute_bigboot | bool - - # - name: Re-enabling services previously disabled - # ansible.builtin.service: - # name: "{{ item }}" - # state: started - # enabled: true - # loop: "{{ bigboot_systemd_disabled_services }}" - # when: - # - bigboot_systemd_disabled_services is defined - # - bigboot_systemd_disabled_services | length > 0 + - name: Set environment for subsequent workflow nodes + ansible.builtin.set_stats: + data: + bigboot_data: "{{ bigboot_data | default({}) | + combine({inventory_hostname: + { + 'bigboot_execute_bigboot': bigboot_execute_bigboot, + 'bigboot_execute_shrink_lv': bigboot_execute_shrink_lv, + 'bigboot_adjacent_lvm_device': bigboot_adjacent_lvm_device, + 'bigboot_lv_shrink_size': bigboot_lv_shrink_size | int, + 'bigboot_size': bigboot_size, + 'bigboot_skip_rear_backup': bigboot_skip_rear | default('false') + } + })}}" diff --git a/bigboot_execute_resize.yml b/bigboot_execute_resize.yml index e254548..8da3313 100644 --- a/bigboot_execute_resize.yml +++ b/bigboot_execute_resize.yml @@ -9,33 +9,10 @@ - bigboot_vars.yml tasks: - - name: Perform service and filesystem checks prior to Bigboot execution + - name: Perform filesystem check prior to Bigboot execution + ansible.builtin.import_tasks: tasks/grub_filesystem_check.yml when: - - (bigboot_execute_shrink_lv | bool or bigboot_execute_bigboot | bool) - block: - # - name: Check for and disable services exceeding the timeout threshold - # ansible.builtin.import_tasks: tasks/check_systemd_services.yml - - - name: Enable Grub filesystem check - ansible.builtin.import_role: - name: autofsck - tasks_from: main.yml - - - name: Flush handlers - ansible.builtin.meta: flush_handlers - - # Make sure to update the reboot code for the WF environment - - name: Reboot to run filesystem checks - ansible.builtin.reboot: - - - name: Disable Grub filesystem check - ansible.builtin.import_role: - name: autofsck - tasks_from: cleanup.yml - - - name: Flush handlers - ansible.builtin.meta: flush_handlers - + - bigboot_data[inventory_hostname]['bigboot_execute_bigboot'] | default(false) | bool - name: Extend the timeout values for physical hosts ansible.builtin.set_fact: @@ -49,23 +26,18 @@ name: infra.lvm_snapshots.shrink_lv vars: shrink_lv_devices: - - device: "{{ bigboot_adjacent_lvm_device }}" - size: "{{ bigboot_lv_shrink_size | int }}" + - device: "{{ bigboot_data[inventory_hostname]['bigboot_adjacent_lvm_device'] }}" + size: "{{ bigboot_data[inventory_hostname]['bigboot_lv_shrink_size'] | int }}" when: - - bigboot_execute_shrink_lv | bool + - bigboot_data[inventory_hostname]['bigboot_execute_shrink_lv'] | bool - name: Expand the /boot partition as requested ansible.builtin.import_role: name: infra.lvm_snapshots.bigboot + vars: + bigboot_size: "{{ bigboot_data[inventory_hostname]['bigboot_size'] }}" when: - - bigboot_execute_bigboot | bool + - bigboot_data[inventory_hostname]['bigboot_execute_bigboot'] | bool - # - name: Re-enabling services previously disabled - # ansible.builtin.service: - # name: "{{ item }}" - # state: started - # enabled: true - # loop: "{{ bigboot_systemd_disabled_services }}" - # when: - # - bigboot_systemd_disabled_services is defined - # - bigboot_systemd_disabled_services | length > 0 + - name: Restore service state for disabled services + ansible.builtin.import_tasks: tasks/restore_services.yml diff --git a/bigboot_rear_backup.yml b/bigboot_rear_backup.yml index ffde0d6..27e1b10 100644 --- a/bigboot_rear_backup.yml +++ b/bigboot_rear_backup.yml @@ -1,6 +1,6 @@ --- -- name: Perform a ReaR backup if any disk modifications are to be made +- name: Perform a ReaR backup before the /boot expansion ansible.builtin.import_playbook: rhc.rear.rear_backup when: - - bigboot_execute_bigboot | default('false') | bool - - not bigboot_skip_rear_backup | default('true') | bool + - bigboot_data[inventory_hostname]['bigboot_execute_bigboot'] | default(false) | bool + - not rear_backup_skip | default(false) | bool diff --git a/bigboot_rear_nfs_export.yml b/bigboot_rear_nfs_export.yml new file mode 100644 index 0000000..e521e42 --- /dev/null +++ b/bigboot_rear_nfs_export.yml @@ -0,0 +1,19 @@ +--- +- name: Perform logical volume and boot parition resizing as needed + hosts: rear_server + become: true + gather_facts: false + + vars_files: + - bigboot_vars.yml + + tasks: + - name: Create IP list and add to NFS exports + ansible.builtin.include_tasks: tasks/rear_nfs_exports.yml + loop: "{{ bigboot_data | dict2items }}" + loop_control: + label: "{{ item['key'] }}" + when: + - item['value']['bigboot_execute_bigboot'] | default(false) | bool + - not rear_backup_skip | default(false) | bool + diff --git a/bigboot_setup_environment.yml b/bigboot_setup_environment.yml index 8cb11f9..466f037 100644 --- a/bigboot_setup_environment.yml +++ b/bigboot_setup_environment.yml @@ -12,22 +12,35 @@ - name: Cleanup from any previous executions ansible.builtin.import_tasks: tasks/cleanup.yml - - name: Capture boot device details + - name: Check for services that require being disabled + ansible.builtin.import_tasks: tasks/check_services.yml + + - name: Set boot device details ansible.builtin.import_tasks: tasks/capture_boot_device_details.yml - - name: Capture logical volume information + - name: Set logical volume information ansible.builtin.import_tasks: tasks/capture_lv_device_details.yml + - name: Run pre-checks to verify environment + ansible.builtin.import_tasks: tasks/pre-checks.yml + when: + - bigboot_execute_bigboot | bool + - name: Set environment for subsequent workflow nodes ansible.builtin.set_stats: + aggregate: true data: - bigboot_execute_bigboot: "{{ bigboot_execute_bigboot }}" - bigboot_execute_shrink_lv: "{{ bigboot_execute_shrink_lv }}" - bigboot_adjacent_lvm_device: "{{ bigboot_adjacent_lvm_device }}" - bigboot_lv_shrink_size: "{{ bigboot_lv_shrink_size | int }}" - bigboot_partition_size: "{{ bigboot_partition_size }}" - bigboot_skip_rear_backup: "{{ bigboot_skip_rear | default('false') }}" - per_host: false - aggregate: false + bigboot_data: "{{ bigboot_data | default({}) | + combine({inventory_hostname: + { + 'bigboot_execute_bigboot': bigboot_execute_bigboot, + 'bigboot_execute_shrink_lv': bigboot_execute_shrink_lv, + 'bigboot_adjacent_lvm_device': bigboot_adjacent_lvm_device, + 'bigboot_lv_shrink_size': bigboot_lv_shrink_size | int, + 'bigboot_size': bigboot_size, + 'ip_addresses': ansible_all_ipv4_addresses, + 'server_hostname': ansible_hostname + } + }) }}" ... diff --git a/tasks/check_services.yml b/tasks/check_services.yml new file mode 100644 index 0000000..dd44d9e --- /dev/null +++ b/tasks/check_services.yml @@ -0,0 +1,56 @@ +--- +- name: Ensure service facts are available + ansible.builtin.service_facts: + +- name: Capture a list of running services + ansible.builtin.set_fact: + bigboot_systemd_running_services: + "{{ bigboot_systemd_running_services | default([]) + [item['key']] }}" + loop: "{{ ansible_facts['services'] | dict2items }}" + loop_control: + label: "{{ item['key'] }}" + when: + - "'running' in item['value']['state']" + +- name: Get the stop timeout value for running services + ansible.builtin.shell: + cmd: | + set -o pipefail + systemctl show {{ item }} | grep TimeoutStopUSec + changed_when: false + register: bigboot_systemd_service_timeout + loop: "{{ bigboot_systemd_running_services }}" + +- name: Adding services exceeding the timeout threshold to the list of services to disable + ansible.builtin.set_fact: + bigboot_services_disabled: "{{ bigboot_services_disabled | default([]) + [item['item']] }}" + loop: "{{ bigboot_systemd_service_timeout['results'] }}" + loop_control: + label: "{{ item['item'] }}" + when: + - item['item'] not in bigboot_protected_services + - item['stdout'] | regex_replace('^.*=(.*$)', '\\1') | community.general.to_minutes >= bigboot_service_max_timeout | int + +- name: Adding incompatible services to the list of services to disable + ansible.builtin.set_fact: + bigboot_services_disabled: "{{ bigboot_services_disabled | default([]) + [item] }}" + loop: "{{ bigboot_incompatible_services }}" + when: + - ansible_facts['services'][item] is defined + - ansible_facts['services'][item]['state'] == "running" + +- name: Log and disable services + when: + - bigboot_services_disabled is defined + - bigboot_services_disabled | length > 0 + block: + - name: Disable services and log their state + ansible.builtin.include_tasks: tasks/disable_systemd_services.yml + loop: "{{ bigboot_services_disabled }}" + + - name: Services disabled notice + ansible.builtin.debug: + msg: >- + The following services were disabled, and will be re-enabled post + Bigboot execution: {{ bigboot_services_disabled | join(', ') }} + diff --git a/tasks/check_systemd_services.yml b/tasks/check_systemd_services.yml deleted file mode 100644 index 2c29381..0000000 --- a/tasks/check_systemd_services.yml +++ /dev/null @@ -1,31 +0,0 @@ ---- -- name: Get the list of services on the host - ansible.builtin.service_facts: - -- name: Capture a list of running services - ansible.builtin.set_fact: - bigboot_systemd_running_services: - "{{ bigboot_systemd_running_services | default([]) + [item['key']] }}" - loop: "{{ ansible_facts['services'] | dict2items }}" - loop_control: - label: "{{ item['key'] }}" - when: - - "'running' in item['value']['state']" - -- name: Get the stop timeout value for running services - ansible.builtin.shell: - cmd: | - set -o pipefail - systemctl show {{ item }} | grep TimeoutStopUSec - changed_when: false - register: bigboot_systemd_service_timeout - loop: "{{ bigboot_systemd_running_services }}" - -- name: Disabling services exceeding the timeout threshold - ansible.builtin.include_tasks: tasks/disable_systemd_services.yml - loop: "{{ bigboot_systemd_service_timeout['results'] }}" - loop_control: - label: "{{ item['item'] }}" - when: - - item['item'] not in bigboot_protected_services - - item['stdout'] | regex_replace('^.*=(.*$)', '\\1') | community.general.to_minutes > bigboot_service_max_timeout diff --git a/tasks/cleanup.yml b/tasks/cleanup.yml index 222f0e0..1e30e8f 100644 --- a/tasks/cleanup.yml +++ b/tasks/cleanup.yml @@ -29,4 +29,14 @@ path: "/boot/initramfs-{{ initramfs_kernel_version }}.img.{{ initramfs_backup_extension }}" state: absent +- name: Check for Bigboot state log and restore services to pre-Bigboot state + ansible.builtin.import_tasks: tasks/restore_services.yml + +- name: Cleanup previous Bigboot state log if present + ansible.builtin.file: + path: "{{ bigboot_disabled_services_log }}" + state: absent + when: + - bigboot_disabled_services_log_stat['stat']['exists'] | bool + ... diff --git a/tasks/disable_systemd_services.yml b/tasks/disable_systemd_services.yml index d90a9f4..4c21418 100644 --- a/tasks/disable_systemd_services.yml +++ b/tasks/disable_systemd_services.yml @@ -1,11 +1,23 @@ --- -- name: Disabling service for exceeding the timeout threshold +- name: "Save service state: {{ item }}" + ansible.builtin.set_fact: + bigboot_service_to_disable: + service: "{{ ansible_facts['services'][item]['name'] }}" + state: "{{ (ansible_facts['services'][item]['state'] == 'running') | ternary('started', 'stopped') }}" + status: "{{ (ansible_facts['services'][item]['status'] == 'enabled') | ternary('true', 'false') }}" + +- name: "Disable and stop service: {{ item }}" ansible.builtin.service: - name: "{{ item['item'] }}" + name: "{{ item }}" state: stopped enabled: false -- name: Append service to list of disabled services - ansible.builtin.set_fact: - bigboot_systemd_disabled_services: - "{{ bigboot_systemd_disabled_services | default([]) + [item['item']] }}" +- name: "Log disabled service state: {{ item }}" + ansible.builtin.lineinfile: + path: "{{ bigboot_disabled_services_log }}" + line: "{{ item }},{{ bigboot_service_to_disable['state'] }},{{ bigboot_service_to_disable['status'] }}" + create: true + state: present + owner: root + group: root + mode: "0600" diff --git a/tasks/grub_filesystem_check.yml b/tasks/grub_filesystem_check.yml new file mode 100644 index 0000000..a1ee48d --- /dev/null +++ b/tasks/grub_filesystem_check.yml @@ -0,0 +1,30 @@ +--- +- name: Enable Grub filesystem check + ansible.builtin.import_role: + name: autofsck + tasks_from: main.yml + +- name: Flush handlers to add Grub parameters for fsck + ansible.builtin.meta: flush_handlers + +# Make sure to update the reboot code for the WF environment +# - name: Reboot to run filesystem checks +# ansible.builtin.reboot: + +- name: Reboot to run filesystem checks + ansible.builtin.command: /sbin/shutdown -r +1 + +- name: Wait for the reboot to complete + ansible.builtin.wait_for_connection: + connect_timeout: 20 + sleep: 10 + delay: "{{ bigboot_post_reboot_delay | default('70') }}" + timeout: "{{ bigboot_reboot_timeout | default('1800') }}" + +- name: Disable Grub filesystem check + ansible.builtin.import_role: + name: autofsck + tasks_from: cleanup.yml + +- name: Flush handlers to remove Grub parameters for fsck + ansible.builtin.meta: flush_handlers diff --git a/tasks/pre-checks.yml b/tasks/pre-checks.yml new file mode 100644 index 0000000..759bc56 --- /dev/null +++ b/tasks/pre-checks.yml @@ -0,0 +1,29 @@ +--- +- name: Get /boot mount information + ansible.builtin.set_fact: + bigboot_boot_mount: "{{ ansible_facts.mounts \ + | selectattr('mount', 'equalto', '/boot') | first }}" + +- name: Set next partition after /boot + ansible.builtin.set_fact: + bigboot_next_device: "{{ bigboot_boot_mount['device'][:-1] }}{{ bigboot_boot_mount['device'][-1:] | int + 1 }}" + +- name: Capture partition information from fdisk + ansible.builtin.shell: + cmd: | + set -o pipefail + fdisk -l {{ bigboot_boot_mount['device'][:-1] }} | grep '{{ bigboot_next_device }}' + executable: /bin/bash + changed_when: false + failed_when: bigboot_fdisk_partition['rc'] not in [0, 141] + register: bigboot_fdisk_partition + +- name: Debug bigboot_fdisk_partition + ansible.builtin.debug: + var: bigboot_fdisk_partition + +- name: Assert that the partition following /boot is of type LVM + ansible.builtin.assert: + that: "'Linux LVM' in bigboot_fdisk_partition['stdout']" + success_msg: The partition following /boot is an LVM partition + fail_msg: The partition following /boot is NOT an LVM partition. Execution halted. diff --git a/tasks/rear_nfs_exports.yml b/tasks/rear_nfs_exports.yml new file mode 100644 index 0000000..c1013e2 --- /dev/null +++ b/tasks/rear_nfs_exports.yml @@ -0,0 +1,9 @@ +--- +- name: Create list of IP addresses and hostnames + ansible.builtin.set_fact: + client_ips: "{{ item['value']['ip_addresses'] | list | flatten }}" + namehost: "{{ item['value']['server_hostname'] }}" + +- name: Include NFS export role + ansible.builtin.include_role: + name: rhc.rear.say_hi diff --git a/tasks/restore_services.yml b/tasks/restore_services.yml new file mode 100644 index 0000000..4d0c915 --- /dev/null +++ b/tasks/restore_services.yml @@ -0,0 +1,25 @@ +--- +- name: Check for Bigboot service state log presence + ansible.builtin.stat: + path: "{{ bigboot_disabled_services_log }}" + register: bigboot_disabled_services_log_stat + +- name: Read state log and restore service state + when: + - bigboot_disabled_services_log_stat['stat']['exists'] | bool + block: + - name: Read service state from log + community.general.read_csv: + path: "{{ bigboot_disabled_services_log }}" + fieldnames: service,state,enabled + delimiter: ',' + register: bigboot_service_state_contents + + - name: Restore service state + ansible.builtin.service: + name: "{{ item['service'] }}" + state: "{{ item['state'] }}" + enabled: "{{ item['enabled'] | bool }}" + loop: "{{ bigboot_service_state_contents['list'] }}" + loop_control: + label: "{{ item['service'] }}" diff --git a/vars/bigboot_vars.yml b/vars/bigboot_vars.yml index af7317e..02ad1c4 100644 --- a/vars/bigboot_vars.yml +++ b/vars/bigboot_vars.yml @@ -8,11 +8,22 @@ bigboot_reboot_timeout: 1800 bigboot_skip_rear_backup: false -# Max value in minutes for the timeout threshold: -bigboot_service_max_timeout: 2 +# Max value in minutes for services timeout threshold: +bigboot_service_max_timeout: 5 + +# List of services incompatible with calculations +# to obtain required disk information: +# +# (These services will ALWAYS be disabled) +bigboot_incompatible_services: + - docker.service + - named-chroot.service # List of services which will be excluded from being # disabled during Bigboot execution: +# +# (Services listed in `bigboot_incompatible_services` +# will ALWAYS be disabled regardless if they are protected or not) bigboot_protected_services: - sshd.service - user@0.service @@ -20,3 +31,7 @@ bigboot_protected_services: - rhnsd.service - rhnsd - boksm.service + - SplunkForwarder.service + +# Filename of disabled services log: +bigboot_disabled_services_log: /var/ipe/ipu/el7to8/bigboot_disabled_services.log