Unverified Commit c05e173b authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Runner - Implement ansible client and runner (#69)

Implement ansible client and runner:
* add ansible client
* add deploy and check_env playbooks
parent e977bbc1
...@@ -8,8 +8,12 @@ container: ...@@ -8,8 +8,12 @@ container:
image: python:3.7 image: python:3.7
steps: steps:
- script: |
echo "##vso[task.prependpath]$HOME/.local/bin"
displayName: Export path
- script: | - script: |
python3 -m pip install .[test,torch] python3 -m pip install .[test,torch]
make postinstall
displayName: Install dependencies displayName: Install dependencies
- script: | - script: |
python3 setup.py lint python3 setup.py lint
......
...@@ -12,8 +12,12 @@ container: ...@@ -12,8 +12,12 @@ container:
image: nvcr.io/nvidia/pytorch:20.12-py3 image: nvcr.io/nvidia/pytorch:20.12-py3
steps: steps:
- script: |
echo "##vso[task.prependpath]$HOME/.local/bin"
displayName: Export path
- script: | - script: |
python3 -m pip install .[test,torch] python3 -m pip install .[test,torch]
make postinstall
displayName: Install dependencies displayName: Install dependencies
- script: | - script: |
SB_MICRO_PATH=$PWD/bin make cppbuild SB_MICRO_PATH=$PWD/bin make cppbuild
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
CPPSOURCES := $(shell find $(CURDIR) -regextype posix-extended -regex '.*\.(c|cpp|h|hpp|cc|cxx|cu)') CPPSOURCES := $(shell find $(CURDIR) -regextype posix-extended -regex '.*\.(c|cpp|h|hpp|cc|cxx|cu)')
.PHONY: cpplint cppformat .PHONY: cpplint cppformat postinstall
cpplint: cpplint:
clang-format --verbose --dry-run --Werror $(CPPSOURCES) clang-format --verbose --dry-run --Werror $(CPPSOURCES)
...@@ -13,3 +13,10 @@ cppformat: ...@@ -13,3 +13,10 @@ cppformat:
cppbuild: cppbuild:
cd ./superbench/benchmarks/ && bash build.sh cd ./superbench/benchmarks/ && bash build.sh
postinstall:
ifeq ($(shell which ansible-galaxy),)
$(error 'Cannot find ansible-galaxy')
else
ansible-galaxy collection install ansible.utils community.crypto
endif
...@@ -104,6 +104,7 @@ __Install SuperBench__ ...@@ -104,6 +104,7 @@ __Install SuperBench__
# install superbench # install superbench
python3 -m pip install . python3 -m pip install .
make postinstall
``` ```
......
...@@ -133,6 +133,8 @@ def run(self): ...@@ -133,6 +133,8 @@ def run(self):
packages=find_packages(exclude=['tests']), packages=find_packages(exclude=['tests']),
python_requires='>=3.6, <4', python_requires='>=3.6, <4',
install_requires=[ install_requires=[
'ansible_base>=2.10.9;os_name=="posix"',
'ansible_runner>=1.4.7',
'colorlog>=4.7.2', 'colorlog>=4.7.2',
'knack>=0.7.2', 'knack>=0.7.2',
'omegaconf>=2.0.6', 'omegaconf>=2.0.6',
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""SuperBench Ansible Client."""
from pathlib import Path
import ansible_runner
from ansible.parsing.dataloader import DataLoader
from ansible.inventory.manager import InventoryManager
from superbench.common.utils import logger
class AnsibleClient():
"""Ansible Client class."""
def __init__(self, config):
"""Initilize.
Args:
config (DictConfig): Ansible config object.
"""
self._playbook_path = Path(__file__).parent / 'playbooks'
self._config = {
'private_data_dir': None,
'inventory': None,
'host_pattern': 'localhost',
'cmdline': '--forks 128',
}
if config:
inventory_file = getattr(config, 'host_file', None)
if inventory_file:
self._config['inventory'] = inventory_file
self._config['host_pattern'] = 'all'
inventory = InventoryManager(loader=DataLoader(), sources=inventory_file)
host_list = inventory.get_groups_dict()['all']
if len(host_list) > 0:
self._config['cmdline'] = '--forks {}'.format(len(host_list))
username = getattr(config, 'host_username', None)
if username:
self._config['cmdline'] += ' --user {}'.format(username)
password = getattr(config, 'host_password', None)
if password:
self._config['passwords'] = {
'password': password,
'passphrase': password,
}
key_file = getattr(config, 'private_key', None)
if key_file:
self._config['cmdline'] += ' --private-key {}'.format(key_file)
elif password:
self._config['cmdline'] += ' --ask-pass --ask-become-pass'
logger.info(self._config)
def run(self, ansible_config, sudo=False): # pragma: no cover
"""Run Ansible runner.
Args:
ansible_config (dict): Ansible config dict.
sudo (bool): Run as sudo or not. Defaults to False.
Returns:
int: Ansible return code.
"""
if sudo:
logger.info('Run as sudo ...')
ansible_config['cmdline'] += ' --become'
r = ansible_runner.run(**ansible_config)
if r.rc == 0:
logger.info('Run succeed, return code {}.'.format(r.rc))
else:
logger.warning('Run failed, return code {}.'.format(r.rc))
logger.info(r.stats)
return r.rc
def get_shell_config(self, cmd):
"""Get ansible config for shell module.
Args:
cmd (str): Shell command for config.
Returns:
dict: Ansible config dict.
"""
logger.info('Run {} on remote ...'.format(cmd))
ansible_config = {
**self._config,
'module': 'shell',
'module_args': cmd,
}
return ansible_config
def get_playbook_config(self, playbook, extravars=None):
"""Get ansible config for playbook.
Args:
playbook (str): Playbook file name.
extravars (dict): Extra variables in playbook. Defaults to None.
Returns:
dict: Ansible config dict.
"""
logger.info('Run playbook {} ...'.format(playbook))
ansible_config = {
**self._config,
'extravars': extravars,
'playbook': str(self._playbook_path / playbook),
}
return ansible_config
- name: Runtime Environment Check
hosts: all
gather_facts: false
max_fail_percentage: 0
vars:
container: sb-workspace
tasks:
- name: Checking container status
shell: docker inspect --format={{ '{{.State.Running}}' }} {{ container }}
register: result
ignore_errors: true
become: yes
- fail:
msg: Container {{ container }} is not running.
when: result is failed or result.stdout != "true"
- name: Runtime Environment Update
hosts: all
gather_facts: true
vars:
workspace: '{{ ansible_user_dir }}/sb-workspace'
container: sb-workspace
sb_nodes: '{{ hostvars.values() | map(attribute="ansible_hostname") | sort }}'
sb_env: |
NNODES={{ sb_nodes | length }}
NODE_RANK={{ lookup('ansible.utils.index_of', sb_nodes, 'eq', ansible_hostname) }}
MASTER_ADDR={{ sb_nodes | first }}
MASTER_PORT=29500
tasks:
- name: Updating Config
copy:
src: '{{ output_dir }}/sb.config.yaml'
dest: '{{ workspace }}/sb.config.yaml'
mode: 0644
become: yes
- name: Updating Env Variables
copy:
content: '{{ sb_env }}'
dest: '{{ workspace }}/sb.env'
mode: 0644
become: yes
- name: Facts Gathering
hosts: all
gather_facts: true
- name: Context Preparation
hosts: localhost
connection: localhost
gather_facts: false
tasks:
- name: Generating SSH Config
template:
src: ../templates/ssh_config.j2
dest: '{{ output_dir }}/ssh_config'
mode: 0640
- name: Generating SSH Key Pair
community.crypto.openssh_keypair:
path: '{{ output_dir }}/id_ed25519'
type: ed25519
comment: superbench
force: no
- name: Remote Deployment
hosts: all
gather_facts: false
vars:
workspace: '{{ ansible_user_dir }}/sb-workspace'
container: sb-workspace
tasks:
- name: Creating Workspace
file:
path: '{{ item }}'
state: directory
mode: 0755
with_items:
- '{{ workspace }}'
- '{{ workspace }}/.ssh'
- name: Copying Context
copy:
src: '{{ item.src }}'
dest: '{{ item.dest }}'
mode: '{{ item.mode }}'
with_items:
- src: '{{ output_dir }}/ssh_config'
dest: '{{ workspace }}/.ssh/config'
mode: '644'
- src: '{{ output_dir }}/id_ed25519.pub'
dest: '{{ workspace }}/.ssh/authorized_keys'
mode: '644'
- src: '{{ output_dir }}/id_ed25519'
dest: '{{ workspace }}/.ssh/key'
mode: '400'
become: yes
- name: Trying to Login Registry
shell: |
docker login {{ docker_registry }} --username {{ docker_username }} --password {{ docker_password }}
become: yes
when: docker_registry is defined
ignore_errors: true
- name: Pulling Container Image
shell: |
docker pull {{ docker_image }}
become: yes
- name: Starting Container
shell: |
docker rm --force {{ container }} ||: && \
docker run -itd --name={{ container }} \
--privileged --net=host --ipc=host --gpus=all \
-w /root -v {{ workspace }}:/root -v /mnt:/mnt \
{{ docker_image }} bash && \
docker exec {{ container }} bash -c \
"chown -R root:root ~ && \
sed -i 's/[# ]*Port.*/Port {{ ssh_port }}/g' /etc/ssh/sshd_config && \
service ssh restart && sb help"
become: yes
...@@ -3,9 +3,13 @@ ...@@ -3,9 +3,13 @@
"""SuperBench Runner.""" """SuperBench Runner."""
import random
from pathlib import Path from pathlib import Path
from omegaconf import ListConfig, OmegaConf
from superbench.common.utils import SuperBenchLogger, logger from superbench.common.utils import SuperBenchLogger, logger
from superbench.runner.ansible import AnsibleClient
class SuperBenchRunner(): class SuperBenchRunner():
...@@ -23,11 +27,16 @@ def __init__(self, sb_config, docker_config, ansible_config, output_dir): ...@@ -23,11 +27,16 @@ def __init__(self, sb_config, docker_config, ansible_config, output_dir):
self._docker_config = docker_config self._docker_config = docker_config
self._ansible_config = ansible_config self._ansible_config = ansible_config
self._output_dir = output_dir self._output_dir = output_dir
self._ansible_client = AnsibleClient(ansible_config)
self.__set_logger('sb-run.log') self.__set_logger('sb-run.log')
logger.info('Runner uses config: %s.', self._sb_config) logger.info('Runner uses config: %s.', self._sb_config)
logger.info('Runner writes to: %s.', self._output_dir) logger.info('Runner writes to: %s.', self._output_dir)
self._sb_benchmarks = self._sb_config.superbench.benchmarks
self._sb_enabled_benchmarks = self.__get_enabled_benchmarks()
logger.info('Runner will run: %s', self._sb_enabled_benchmarks)
def __set_logger(self, filename): def __set_logger(self, filename):
"""Set logger and add file handler. """Set logger and add file handler.
...@@ -36,6 +45,45 @@ def __set_logger(self, filename): ...@@ -36,6 +45,45 @@ def __set_logger(self, filename):
""" """
SuperBenchLogger.add_handler(logger.logger, filename=str(Path(self._output_dir) / filename)) SuperBenchLogger.add_handler(logger.logger, filename=str(Path(self._output_dir) / filename))
def __get_enabled_benchmarks(self):
"""Get enabled benchmarks list.
Return:
list: List of benchmarks which will be executed.
"""
if self._sb_config.superbench.enable:
if isinstance(self._sb_config.superbench.enable, str):
return [self._sb_config.superbench.enable]
elif isinstance(self._sb_config.superbench.enable, (list, ListConfig)):
return list(self._sb_config.superbench.enable)
return [k for k, v in self._sb_benchmarks.items() if v.enable]
def deploy(self): # pragma: no cover
"""Deploy SuperBench environment."""
logger.info('Preparing SuperBench environment.')
extravars = {
'ssh_port': random.randint(1 << 14, (1 << 15) - 1),
'output_dir': self._output_dir,
'docker_image': self._docker_config.image,
}
if bool(self._docker_config.username) and bool(self._docker_config.password):
extravars.update(
{
'docker_registry': self._docker_config.registry,
'docker_username': self._docker_config.username,
'docker_password': self._docker_config.password,
}
)
self._ansible_client.run(self._ansible_client.get_playbook_config('deploy.yaml', extravars=extravars))
def check_env(self): # pragma: no cover
"""Check SuperBench environment."""
logger.info('Checking SuperBench environment.')
OmegaConf.save(config=self._sb_config, f=str(Path(self._output_dir) / 'sb.config.yaml'))
self._ansible_client.run(
self._ansible_client.get_playbook_config('check_env.yaml', extravars={'output_dir': self._output_dir})
)
def run(self): def run(self):
"""Run the SuperBench benchmarks distributedly. """Run the SuperBench benchmarks distributedly.
......
{% for host in hostvars.values() | map(attribute='ansible_hostname') | sort %}
Host node{{ loop.index0 }}
HostName {{ host }}
Port {{ ssh_port }}
IdentityFile /root/.ssh/key
StrictHostKeyChecking no
{% endfor %}
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""SuperBench Ansible Client test."""
import os
import unittest
import tempfile
from pathlib import Path
from omegaconf import OmegaConf
from superbench.runner.ansible import AnsibleClient
class AnsibleClientTestCase(unittest.TestCase):
"""A class for ansible client test cases."""
def setUp(self):
"""Hook method for setting up the test fixture before exercising it."""
fd, self.host_file = tempfile.mkstemp()
os.write(
fd, (
'all:\n'
' hosts:\n'
' 10.0.0.10:\n'
' 10.0.0.11:\n'
' 10.0.0.12:\n'
' 10.0.0.13:\n'
' 10.0.0.14:\n'
).encode()
)
os.close(fd)
self.ansible_client = AnsibleClient(
OmegaConf.create({
'host_file': self.host_file,
'host_username': 'user',
'host_password': 'pass',
})
)
def tearDown(self):
"""Hook method for deconstructing the test fixture after testing it."""
Path(self.host_file).unlink()
def test_init_config(self):
"""Test initial config of client."""
self.assertDictEqual(
self.ansible_client._config, {
'private_data_dir': None,
'inventory': self.host_file,
'host_pattern': 'all',
'cmdline': '--forks 5 --user user --ask-pass --ask-become-pass',
'passwords': {
'password': 'pass',
'passphrase': 'pass',
},
}
)
def test_get_shell_config(self):
"""Test get_shell_config of client."""
cmd = 'ls -la'
self.assertDictEqual(
self.ansible_client.get_shell_config(cmd), {
'private_data_dir': None,
'inventory': self.host_file,
'host_pattern': 'all',
'cmdline': '--forks 5 --user user --ask-pass --ask-become-pass',
'passwords': {
'password': 'pass',
'passphrase': 'pass',
},
'module': 'shell',
'module_args': cmd,
}
)
def test_get_playbook_config(self):
"""Test get_playbook_config of client."""
self.assertDictEqual(
self.ansible_client.get_playbook_config('play', {'foo': 'bar'}), {
'private_data_dir': None,
'inventory': self.host_file,
'host_pattern': 'all',
'cmdline': '--forks 5 --user user --ask-pass --ask-become-pass',
'passwords': {
'password': 'pass',
'passphrase': 'pass',
},
'extravars': {
'foo': 'bar',
},
'playbook': str(self.ansible_client._playbook_path / 'play'),
}
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment