check_env.yaml 1.52 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
- name: Runtime Environment Check
  hosts: all
  gather_facts: false
  max_fail_percentage: 0
  vars:
    container: sb-workspace
  tasks:
    - name: Checking container status
      shell: docker inspect --format={{ '{{.State.Running}}' }} {{ container }}
      register: result
      ignore_errors: true
      become: yes
    - fail:
        msg: Container {{ container }} is not running.
      when: result is failed or result.stdout != "true"

- name: Runtime Environment Update
  hosts: all
  gather_facts: true
  vars:
    workspace: '{{ ansible_user_dir }}/sb-workspace'
    container: sb-workspace
    sb_nodes: '{{ hostvars.values() | map(attribute="ansible_hostname") | sort }}'
    sb_env: |
25
      # pytorch env
26
27
28
29
      NNODES={{ sb_nodes | length }}
      NODE_RANK={{ lookup('ansible.utils.index_of', sb_nodes, 'eq', ansible_hostname) }}
      MASTER_ADDR={{ sb_nodes | first }}
      MASTER_PORT=29500
30
31
32
      OMP_NUM_THREADS=1
      # config env
      {{ env | default('') }}
33
34
35
36
37
38
39
40
41
42
  tasks:
    - name: Updating Config
      copy:
        src: '{{ output_dir }}/sb.config.yaml'
        dest: '{{ workspace }}/sb.config.yaml'
        mode: 0644
      become: yes
    - name: Updating Env Variables
      copy:
        content: '{{ sb_env }}'
43
        dest: '{{ item }}'
44
        mode: 0644
45
46
47
      with_items:
        - '{{ workspace }}/sb.env'
        - /tmp/sb.env
48
      become: yes
Yifan Xiong's avatar
Yifan Xiong committed
49
50
51
52
53
54
    - name: Updating Hostfile
      copy:
        content: "{{ sb_nodes | join('\n') }}\n"
        dest: '{{ workspace }}/hostfile'
        mode: 0644
      become: yes