check_env.yaml 1.78 KB
Newer Older
1
2
3
4
5
6
- name: Runtime Environment Check
  hosts: all
  gather_facts: false
  max_fail_percentage: 0
  vars:
    container: sb-workspace
7
    skip: '{{ no_docker | default(false) }}'
8
9
10
11
12
13
  tasks:
    - name: Checking container status
      shell: docker inspect --format={{ '{{.State.Running}}' }} {{ container }}
      register: result
      ignore_errors: true
      become: yes
14
      when: not skip
15
16
    - fail:
        msg: Container {{ container }} is not running.
17
      when: (not skip) and (result is failed or result.stdout != "true")
18
19
20
21
22
23
24
25
26

- name: Runtime Environment Update
  hosts: all
  gather_facts: true
  vars:
    workspace: '{{ ansible_user_dir }}/sb-workspace'
    container: sb-workspace
    sb_nodes: '{{ hostvars.values() | map(attribute="ansible_hostname") | sort }}'
    sb_env: |
27
28
      # sb env
      HOST_WS={{ ansible_user_dir }}/sb-workspace
29
      # pytorch env
30
31
32
33
      NNODES={{ sb_nodes | length }}
      NODE_RANK={{ lookup('ansible.utils.index_of', sb_nodes, 'eq', ansible_hostname) }}
      MASTER_ADDR={{ sb_nodes | first }}
      MASTER_PORT=29500
34
35
36
      OMP_NUM_THREADS=1
      # config env
      {{ env | default('') }}
37
  tasks:
38
39
40
41
42
    - name: Ensure Workspace
      file:
        path: '{{ workspace }}'
        state: directory
        mode: 0755
43
44
45
46
47
48
49
50
51
    - name: Updating Config
      copy:
        src: '{{ output_dir }}/sb.config.yaml'
        dest: '{{ workspace }}/sb.config.yaml'
        mode: 0644
      become: yes
    - name: Updating Env Variables
      copy:
        content: '{{ sb_env }}'
52
        dest: '{{ item }}'
53
        mode: 0644
54
55
56
      with_items:
        - '{{ workspace }}/sb.env'
        - /tmp/sb.env
57
      become: yes
Yifan Xiong's avatar
Yifan Xiong committed
58
59
60
61
62
63
    - name: Updating Hostfile
      copy:
        content: "{{ sb_nodes | join('\n') }}\n"
        dest: '{{ workspace }}/hostfile'
        mode: 0644
      become: yes