check_env.yaml 2.06 KB
Newer Older
1
2
3
4
5
6
- name: Runtime Environment Check
  hosts: all
  gather_facts: false
  max_fail_percentage: 0
  vars:
    container: sb-workspace
7
    skip: '{{ no_docker | default(false) }}'
8
9
10
11
12
13
  tasks:
    - name: Checking container status
      shell: docker inspect --format={{ '{{.State.Running}}' }} {{ container }}
      register: result
      ignore_errors: true
      become: yes
14
      when: not skip
15
16
    - fail:
        msg: Container {{ container }} is not running.
17
      when: (not skip) and (result is failed or result.stdout != "true")
18
19
20
21
22
23
24

- name: Runtime Environment Update
  hosts: all
  gather_facts: true
  vars:
    workspace: '{{ ansible_user_dir }}/sb-workspace'
    container: sb-workspace
25
    skip_docker: '{{ no_docker | default(false) }}'
26
27
    sb_nodes: '{{ hostvars.values() | map(attribute="ansible_hostname") | sort }}'
    sb_env: |
28
      # sb env
29
      SB_WORKSPACE={{ workspace if skip_docker else '/root' }}
30
      # pytorch env
31
32
33
34
      NNODES={{ sb_nodes | length }}
      NODE_RANK={{ lookup('ansible.utils.index_of', sb_nodes, 'eq', ansible_hostname) }}
      MASTER_ADDR={{ sb_nodes | first }}
      MASTER_PORT=29500
35
36
37
      OMP_NUM_THREADS=1
      # config env
      {{ env | default('') }}
38
  tasks:
39
40
41
42
43
    - name: Ensure Workspace
      file:
        path: '{{ workspace }}'
        state: directory
        mode: 0755
44
45
46
47
48
49
50
51
52
    - name: Updating Config
      copy:
        src: '{{ output_dir }}/sb.config.yaml'
        dest: '{{ workspace }}/sb.config.yaml'
        mode: 0644
      become: yes
    - name: Updating Env Variables
      copy:
        content: '{{ sb_env }}'
53
        dest: '{{ item }}'
54
        mode: 0644
55
56
57
      with_items:
        - '{{ workspace }}/sb.env'
        - /tmp/sb.env
58
      become: yes
59
    - name: Updating Hostfile to Remote
Yifan Xiong's avatar
Yifan Xiong committed
60
61
62
63
64
      copy:
        content: "{{ sb_nodes | join('\n') }}\n"
        dest: '{{ workspace }}/hostfile'
        mode: 0644
      become: yes
65
66
67
68
69
70
71
    - name: Generating Hostfile to Local
      delegate_to: localhost
      run_once: true
      copy:
        content: "{{ sb_nodes | join('\n') }}\n"
        dest: '{{ output_dir }}/hostfile'
        mode: 0644