tests/installed: Make reboot task less racy
This took a whole lot of experimentation. I hit upon the idea of doing a `systemctl stop sshd` to avoid the situation where we might ssh back into the system while it's in the process of shutting down. Ultimately the other fix is disabling `ControlMaster`; see for example: https://github.com/ansible/ansible/issues/17935 Closes: #1548 Approved by: cgwalters
This commit is contained in:
parent
972a1921f5
commit
e5f6c9d1e2
|
|
@ -32,4 +32,6 @@ rpm -q standard-test-roles
|
||||||
|
|
||||||
export ANSIBLE_INVENTORY=${ANSIBLE_INVENTORY:-$(test -e inventory && echo inventory || echo /usr/share/ansible/inventory)}
|
export ANSIBLE_INVENTORY=${ANSIBLE_INVENTORY:-$(test -e inventory && echo inventory || echo /usr/share/ansible/inventory)}
|
||||||
ls -al /dev/kvm
|
ls -al /dev/kvm
|
||||||
|
# Sadly having this on makes the reboot playbook break
|
||||||
|
export ANSIBLE_SSH_ARGS='-o ControlMaster=no'
|
||||||
exec ansible-playbook --tags=atomic "$@"
|
exec ansible-playbook --tags=atomic "$@"
|
||||||
|
|
|
||||||
|
|
@ -33,34 +33,35 @@
|
||||||
command: cat /proc/sys/kernel/random/boot_id
|
command: cat /proc/sys/kernel/random/boot_id
|
||||||
register: orig_bootid
|
register: orig_bootid
|
||||||
|
|
||||||
|
# Stop sshd (thus preventing new connections) and kill our current user's
|
||||||
|
# connection so that we can't race to get back in to the system while it's
|
||||||
|
# shutting down
|
||||||
- name: restart hosts
|
- name: restart hosts
|
||||||
when: (not skip_shutdown is defined) or (not skip_shutdown)
|
when: (not skip_shutdown is defined) or (not skip_shutdown)
|
||||||
shell: sleep 3 && shutdown -r now
|
shell: |
|
||||||
|
systemctl stop sshd
|
||||||
|
systemd-run --on-active=5 systemctl reboot
|
||||||
async: 1
|
async: 1
|
||||||
poll: 0
|
poll: 0
|
||||||
ignore_errors: true
|
ignore_errors: true
|
||||||
|
|
||||||
# NB: The following tasks use local actions, so we need to explicitly ensure
|
# NB: The wait_for is executed locally and doesn't require privs, so avoid sudo
|
||||||
# that they don't use sudo, which may require a password, and is not necessary
|
- debug:
|
||||||
# anyway.
|
msg: "Waiting for reboot: {{ ansible_date_time.iso8601 }}"
|
||||||
|
- wait_for_connection:
|
||||||
|
delay: 5
|
||||||
|
timeout: 120
|
||||||
|
search_regex: "OpenSSH"
|
||||||
|
- debug:
|
||||||
|
msg: "SSH port is up {{ ansible_date_time.iso8601 }}"
|
||||||
|
|
||||||
- name: wait for hosts to come back up
|
- name: Assert that the bootid changed
|
||||||
local_action:
|
|
||||||
wait_for host={{ real_ansible_host }}
|
|
||||||
port={{ real_ansible_port | default('22') }}
|
|
||||||
state=started
|
|
||||||
delay=30
|
|
||||||
timeout={{ timeout }}
|
|
||||||
search_regex="OpenSSH"
|
|
||||||
become: false
|
|
||||||
|
|
||||||
# I'm not sure the retries are even necessary, but I'm keeping them in
|
|
||||||
- name: Wait until bootid changes
|
|
||||||
command: cat /proc/sys/kernel/random/boot_id
|
command: cat /proc/sys/kernel/random/boot_id
|
||||||
register: new_bootid
|
register: new_bootid
|
||||||
until: new_bootid.stdout != orig_bootid.stdout
|
until: new_bootid.stdout != orig_bootid.stdout
|
||||||
retries: 6
|
- assert:
|
||||||
delay: 10
|
that:
|
||||||
|
- new_bootid.stdout != orig_bootid.stdout
|
||||||
|
|
||||||
# provide an empty iterator when a list is not provided
|
# provide an empty iterator when a list is not provided
|
||||||
# http://docs.ansible.com/ansible/playbooks_conditionals.html#loops-and-conditionals
|
# http://docs.ansible.com/ansible/playbooks_conditionals.html#loops-and-conditionals
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue