Unverified Commit c05e173b authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Runner - Implement ansible client and runner (#69)

Implement ansible client and runner:
* add ansible client
* add deploy and check_env playbooks
parent e977bbc1
......@@ -8,8 +8,12 @@ container:
image: python:3.7
steps:
- script: |
echo "##vso[task.prependpath]$HOME/.local/bin"
displayName: Export path
- script: |
python3 -m pip install .[test,torch]
make postinstall
displayName: Install dependencies
- script: |
python3 setup.py lint
......
......@@ -12,8 +12,12 @@ container:
image: nvcr.io/nvidia/pytorch:20.12-py3
steps:
- script: |
echo "##vso[task.prependpath]$HOME/.local/bin"
displayName: Export path
- script: |
python3 -m pip install .[test,torch]
make postinstall
displayName: Install dependencies
- script: |
SB_MICRO_PATH=$PWD/bin make cppbuild
......
......@@ -3,7 +3,7 @@
CPPSOURCES := $(shell find $(CURDIR) -regextype posix-extended -regex '.*\.(c|cpp|h|hpp|cc|cxx|cu)')
.PHONY: cpplint cppformat
.PHONY: cpplint cppformat postinstall
cpplint:
clang-format --verbose --dry-run --Werror $(CPPSOURCES)
......@@ -13,3 +13,10 @@ cppformat:
cppbuild:
cd ./superbench/benchmarks/ && bash build.sh
postinstall:
ifeq ($(shell which ansible-galaxy),)
$(error 'Cannot find ansible-galaxy')
else
ansible-galaxy collection install ansible.utils community.crypto
endif
......@@ -104,6 +104,7 @@ __Install SuperBench__
# install superbench
python3 -m pip install .
make postinstall
```
......
......@@ -133,6 +133,8 @@ def run(self):
packages=find_packages(exclude=['tests']),
python_requires='>=3.6, <4',
install_requires=[
'ansible_base>=2.10.9;os_name=="posix"',
'ansible_runner>=1.4.7',
'colorlog>=4.7.2',
'knack>=0.7.2',
'omegaconf>=2.0.6',
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""SuperBench Ansible Client."""
from pathlib import Path
import ansible_runner
from ansible.parsing.dataloader import DataLoader
from ansible.inventory.manager import InventoryManager
from superbench.common.utils import logger
class AnsibleClient():
"""Ansible Client class."""
def __init__(self, config):
"""Initilize.
Args:
config (DictConfig): Ansible config object.
"""
self._playbook_path = Path(__file__).parent / 'playbooks'
self._config = {
'private_data_dir': None,
'inventory': None,
'host_pattern': 'localhost',
'cmdline': '--forks 128',
}
if config:
inventory_file = getattr(config, 'host_file', None)
if inventory_file:
self._config['inventory'] = inventory_file
self._config['host_pattern'] = 'all'
inventory = InventoryManager(loader=DataLoader(), sources=inventory_file)
host_list = inventory.get_groups_dict()['all']
if len(host_list) > 0:
self._config['cmdline'] = '--forks {}'.format(len(host_list))
username = getattr(config, 'host_username', None)
if username:
self._config['cmdline'] += ' --user {}'.format(username)
password = getattr(config, 'host_password', None)
if password:
self._config['passwords'] = {
'password': password,
'passphrase': password,
}
key_file = getattr(config, 'private_key', None)
if key_file:
self._config['cmdline'] += ' --private-key {}'.format(key_file)
elif password:
self._config['cmdline'] += ' --ask-pass --ask-become-pass'
logger.info(self._config)
def run(self, ansible_config, sudo=False): # pragma: no cover
"""Run Ansible runner.
Args:
ansible_config (dict): Ansible config dict.
sudo (bool): Run as sudo or not. Defaults to False.
Returns:
int: Ansible return code.
"""
if sudo:
logger.info('Run as sudo ...')
ansible_config['cmdline'] += ' --become'
r = ansible_runner.run(**ansible_config)
if r.rc == 0:
logger.info('Run succeed, return code {}.'.format(r.rc))
else:
logger.warning('Run failed, return code {}.'.format(r.rc))
logger.info(r.stats)
return r.rc
def get_shell_config(self, cmd):
"""Get ansible config for shell module.
Args:
cmd (str): Shell command for config.
Returns:
dict: Ansible config dict.
"""
logger.info('Run {} on remote ...'.format(cmd))
ansible_config = {
**self._config,
'module': 'shell',
'module_args': cmd,
}
return ansible_config
def get_playbook_config(self, playbook, extravars=None):
"""Get ansible config for playbook.
Args:
playbook (str): Playbook file name.
extravars (dict): Extra variables in playbook. Defaults to None.
Returns:
dict: Ansible config dict.
"""
logger.info('Run playbook {} ...'.format(playbook))
ansible_config = {
**self._config,
'extravars': extravars,
'playbook': str(self._playbook_path / playbook),
}
return ansible_config
- name: Runtime Environment Check
hosts: all
gather_facts: false
max_fail_percentage: 0
vars:
container: sb-workspace
tasks:
- name: Checking container status
shell: docker inspect --format={{ '{{.State.Running}}' }} {{ container }}
register: result
ignore_errors: true
become: yes
- fail:
msg: Container {{ container }} is not running.
when: result is failed or result.stdout != "true"
- name: Runtime Environment Update
hosts: all
gather_facts: true
vars:
workspace: '{{ ansible_user_dir }}/sb-workspace'
container: sb-workspace
sb_nodes: '{{ hostvars.values() | map(attribute="ansible_hostname") | sort }}'
sb_env: |
NNODES={{ sb_nodes | length }}
NODE_RANK={{ lookup('ansible.utils.index_of', sb_nodes, 'eq', ansible_hostname) }}
MASTER_ADDR={{ sb_nodes | first }}
MASTER_PORT=29500
tasks:
- name: Updating Config
copy:
src: '{{ output_dir }}/sb.config.yaml'
dest: '{{ workspace }}/sb.config.yaml'
mode: 0644
become: yes
- name: Updating Env Variables
copy:
content: '{{ sb_env }}'
dest: '{{ workspace }}/sb.env'
mode: 0644
become: yes
- name: Facts Gathering
hosts: all
gather_facts: true
- name: Context Preparation
hosts: localhost
connection: localhost
gather_facts: false
tasks:
- name: Generating SSH Config
template:
src: ../templates/ssh_config.j2
dest: '{{ output_dir }}/ssh_config'
mode: 0640
- name: Generating SSH Key Pair
community.crypto.openssh_keypair:
path: '{{ output_dir }}/id_ed25519'
type: ed25519
comment: superbench
force: no
- name: Remote Deployment
hosts: all
gather_facts: false
vars:
workspace: '{{ ansible_user_dir }}/sb-workspace'
container: sb-workspace
tasks:
- name: Creating Workspace
file:
path: '{{ item }}'
state: directory
mode: 0755
with_items:
- '{{ workspace }}'
- '{{ workspace }}/.ssh'
- name: Copying Context
copy:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: '{{ item.mode }}'
with_items:
- src: '{{ output_dir }}/ssh_config'
dest: '{{ workspace }}/.ssh/config'
mode: '644'
- src: '{{ output_dir }}/id_ed25519.pub'
dest: '{{ workspace }}/.ssh/authorized_keys'
mode: '644'
- src: '{{ output_dir }}/id_ed25519'
dest: '{{ workspace }}/.ssh/key'
mode: '400'
become: yes
- name: Trying to Login Registry
shell: |
docker login {{ docker_registry }} --username {{ docker_username }} --password {{ docker_password }}
become: yes
when: docker_registry is defined
ignore_errors: true
- name: Pulling Container Image
shell: |
docker pull {{ docker_image }}
become: yes
- name: Starting Container
shell: |
docker rm --force {{ container }} ||: && \
docker run -itd --name={{ container }} \
--privileged --net=host --ipc=host --gpus=all \
-w /root -v {{ workspace }}:/root -v /mnt:/mnt \
{{ docker_image }} bash && \
docker exec {{ container }} bash -c \
"chown -R root:root ~ && \
sed -i 's/[# ]*Port.*/Port {{ ssh_port }}/g' /etc/ssh/sshd_config && \
service ssh restart && sb help"
become: yes
......@@ -3,9 +3,13 @@
"""SuperBench Runner."""
import random
from pathlib import Path
from omegaconf import ListConfig, OmegaConf
from superbench.common.utils import SuperBenchLogger, logger
from superbench.runner.ansible import AnsibleClient
class SuperBenchRunner():
......@@ -23,11 +27,16 @@ def __init__(self, sb_config, docker_config, ansible_config, output_dir):
self._docker_config = docker_config
self._ansible_config = ansible_config
self._output_dir = output_dir
self._ansible_client = AnsibleClient(ansible_config)
self.__set_logger('sb-run.log')
logger.info('Runner uses config: %s.', self._sb_config)
logger.info('Runner writes to: %s.', self._output_dir)
self._sb_benchmarks = self._sb_config.superbench.benchmarks
self._sb_enabled_benchmarks = self.__get_enabled_benchmarks()
logger.info('Runner will run: %s', self._sb_enabled_benchmarks)
def __set_logger(self, filename):
"""Set logger and add file handler.
......@@ -36,6 +45,45 @@ def __set_logger(self, filename):
"""
SuperBenchLogger.add_handler(logger.logger, filename=str(Path(self._output_dir) / filename))
def __get_enabled_benchmarks(self):
"""Get enabled benchmarks list.
Return:
list: List of benchmarks which will be executed.
"""
if self._sb_config.superbench.enable:
if isinstance(self._sb_config.superbench.enable, str):
return [self._sb_config.superbench.enable]
elif isinstance(self._sb_config.superbench.enable, (list, ListConfig)):
return list(self._sb_config.superbench.enable)
return [k for k, v in self._sb_benchmarks.items() if v.enable]
def deploy(self): # pragma: no cover
"""Deploy SuperBench environment."""
logger.info('Preparing SuperBench environment.')
extravars = {
'ssh_port': random.randint(1 << 14, (1 << 15) - 1),
'output_dir': self._output_dir,
'docker_image': self._docker_config.image,
}
if bool(self._docker_config.username) and bool(self._docker_config.password):
extravars.update(
{
'docker_registry': self._docker_config.registry,
'docker_username': self._docker_config.username,
'docker_password': self._docker_config.password,
}
)
self._ansible_client.run(self._ansible_client.get_playbook_config('deploy.yaml', extravars=extravars))
def check_env(self): # pragma: no cover
"""Check SuperBench environment."""
logger.info('Checking SuperBench environment.')
OmegaConf.save(config=self._sb_config, f=str(Path(self._output_dir) / 'sb.config.yaml'))
self._ansible_client.run(
self._ansible_client.get_playbook_config('check_env.yaml', extravars={'output_dir': self._output_dir})
)
def run(self):
"""Run the SuperBench benchmarks distributedly.
......
{% for host in hostvars.values() | map(attribute='ansible_hostname') | sort %}
Host node{{ loop.index0 }}
HostName {{ host }}
Port {{ ssh_port }}
IdentityFile /root/.ssh/key
StrictHostKeyChecking no
{% endfor %}
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""SuperBench Ansible Client test."""
import os
import unittest
import tempfile
from pathlib import Path
from omegaconf import OmegaConf
from superbench.runner.ansible import AnsibleClient
class AnsibleClientTestCase(unittest.TestCase):
"""A class for ansible client test cases."""
def setUp(self):
"""Hook method for setting up the test fixture before exercising it."""
fd, self.host_file = tempfile.mkstemp()
os.write(
fd, (
'all:\n'
' hosts:\n'
' 10.0.0.10:\n'
' 10.0.0.11:\n'
' 10.0.0.12:\n'
' 10.0.0.13:\n'
' 10.0.0.14:\n'
).encode()
)
os.close(fd)
self.ansible_client = AnsibleClient(
OmegaConf.create({
'host_file': self.host_file,
'host_username': 'user',
'host_password': 'pass',
})
)
def tearDown(self):
"""Hook method for deconstructing the test fixture after testing it."""
Path(self.host_file).unlink()
def test_init_config(self):
"""Test initial config of client."""
self.assertDictEqual(
self.ansible_client._config, {
'private_data_dir': None,
'inventory': self.host_file,
'host_pattern': 'all',
'cmdline': '--forks 5 --user user --ask-pass --ask-become-pass',
'passwords': {
'password': 'pass',
'passphrase': 'pass',
},
}
)
def test_get_shell_config(self):
"""Test get_shell_config of client."""
cmd = 'ls -la'
self.assertDictEqual(
self.ansible_client.get_shell_config(cmd), {
'private_data_dir': None,
'inventory': self.host_file,
'host_pattern': 'all',
'cmdline': '--forks 5 --user user --ask-pass --ask-become-pass',
'passwords': {
'password': 'pass',
'passphrase': 'pass',
},
'module': 'shell',
'module_args': cmd,
}
)
def test_get_playbook_config(self):
"""Test get_playbook_config of client."""
self.assertDictEqual(
self.ansible_client.get_playbook_config('play', {'foo': 'bar'}), {
'private_data_dir': None,
'inventory': self.host_file,
'host_pattern': 'all',
'cmdline': '--forks 5 --user user --ask-pass --ask-become-pass',
'passwords': {
'password': 'pass',
'passphrase': 'pass',
},
'extravars': {
'foo': 'bar',
},
'playbook': str(self.ansible_client._playbook_path / 'play'),
}
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment