- name: Runtime Environment Check hosts: all gather_facts: false max_fail_percentage: 0 vars: container: sb-workspace skip: '{{ no_docker | default(false) }}' tasks: - name: Checking container status shell: docker inspect --format={{ '{{.State.Running}}' }} {{ container }} register: result ignore_errors: true become: yes when: not skip - fail: msg: Container {{ container }} is not running. when: (not skip) and (result is failed or result.stdout != "true") - name: Runtime Environment Update hosts: all gather_facts: true vars: workspace: '{{ ansible_user_dir }}/sb-workspace' container: sb-workspace skip_docker: '{{ no_docker | default(false) }}' sb_nodes: '{{ hostvars.values() | map(attribute="ansible_hostname") | sort }}' sb_env: | # sb env SB_WORKSPACE={{ workspace if skip_docker else '/root' }} # pytorch env NNODES={{ sb_nodes | length }} NODE_RANK={{ lookup('ansible.utils.index_of', sb_nodes, 'eq', ansible_hostname) }} MASTER_ADDR={{ sb_nodes | first }} MASTER_PORT=29500 OMP_NUM_THREADS=1 # config env {{ env | default('') }} tasks: - name: Ensure Workspace file: path: '{{ workspace }}' state: directory mode: 0755 - name: Updating Config copy: src: '{{ output_dir }}/sb.config.yaml' dest: '{{ workspace }}/sb.config.yaml' mode: 0644 become: yes - name: Updating Env Variables copy: content: '{{ sb_env }}' dest: '{{ item }}' mode: 0644 with_items: - '{{ workspace }}/sb.env' - /tmp/sb.env become: yes - name: Updating Hostfile to Remote copy: content: "{{ sb_nodes | join('\n') }}\n" dest: '{{ workspace }}/hostfile' mode: 0644 become: yes - name: Generating Hostfile to Local delegate_to: localhost run_once: true copy: content: "{{ sb_nodes | join('\n') }}\n" dest: '{{ output_dir }}/hostfile' mode: 0644