Runner - Implement ansible client and runner (#69)

Implement ansible client and runner: * add ansible client * add deploy and check_env playbooks

Runner - Implement ansible client and runner (#69)
Implement ansible client and runner: * add ansible client * add deploy and check_env playbooks
c05e173b · Yifan Xiong · GitHub · e977bbc1 · c05e173b · c05e173b
Unverified Commit c05e173b authored May 23, 2021 by Yifan Xiong Committed by GitHub May 23, 2021
11 changed files
--- a/.azure-pipelines/cpu-unit-test.yml
+++ b/.azure-pipelines/cpu-unit-test.yml
@@ -8,8 +8,12 @@ container:
  image: python:3.7
 steps:
+  - script: |
+      echo "##vso[task.prependpath]$HOME/.local/bin"
+    displayName: Export path
  - script: |
      python3 -m pip install .[test,torch]
+      make postinstall
    displayName: Install dependencies
  - script: |
      python3 setup.py lint

--- a/.azure-pipelines/cuda-unit-test.yml
+++ b/.azure-pipelines/cuda-unit-test.yml
@@ -12,8 +12,12 @@ container:
  image: nvcr.io/nvidia/pytorch:20.12-py3
 steps:
+  - script: |
+      echo "##vso[task.prependpath]$HOME/.local/bin"
+    displayName: Export path
  - script: |
      python3 -m pip install .[test,torch]
+      make postinstall
    displayName: Install dependencies
  - script: |
      SB_MICRO_PATH=$PWD/bin make cppbuild

--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
 CPPSOURCES := $(shell find $(CURDIR) -regextype posix-extended -regex '.*\.(c|cpp|h|hpp|cc|cxx|cu)')
-.PHONY: cpplint cppformat
+.PHONY: cpplint cppformat postinstall
 cpplint:
 	clang-format --verbose --dry-run --Werror $(CPPSOURCES)
@@ -13,3 +13,10 @@ cppformat:
 cppbuild:
 	cd ./superbench/benchmarks/ && bash build.sh
+postinstall:
+ifeq ($(shell which ansible-galaxy),)
+	$(error 'Cannot find ansible-galaxy')
+else
+	ansible-galaxy collection install ansible.utils community.crypto
+endif
--- a/README.md
+++ b/README.md
@@ -104,6 +104,7 @@ __Install SuperBench__
    # install superbench
    python3 -m pip install .
+    make postinstall
    ```

--- a/setup.py
+++ b/setup.py
@@ -133,6 +133,8 @@ setup(
    packages=find_packages(exclude=['tests']),
    python_requires='>=3.6, <4',
    install_requires=[
+        'ansible_base>=2.10.9;os_name=="posix"',
+        'ansible_runner>=1.4.7',
        'colorlog>=4.7.2',
        'knack>=0.7.2',
        'omegaconf>=2.0.6',

--- a/superbench/runner/ansible.py
+++ b/superbench/runner/ansible.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""SuperBench Ansible Client."""
+from pathlib import Path
+import ansible_runner
+from ansible.parsing.dataloader import DataLoader
+from ansible.inventory.manager import InventoryManager
+from superbench.common.utils import logger
+class AnsibleClient():
+    """Ansible Client class."""
+    def __init__(self, config):
+        """Initilize.
+        Args:
+            config (DictConfig): Ansible config object.
+        """
+        self._playbook_path = Path(__file__).parent / 'playbooks'
+        self._config = {
+            'private_data_dir': None,
+            'inventory': None,
+            'host_pattern': 'localhost',
+            'cmdline': '--forks 128',
+        }
+        if config:
+            inventory_file = getattr(config, 'host_file', None)
+            if inventory_file:
+                self._config['inventory'] = inventory_file
+                self._config['host_pattern'] = 'all'
+                inventory = InventoryManager(loader=DataLoader(), sources=inventory_file)
+                host_list = inventory.get_groups_dict()['all']
+                if len(host_list) > 0:
+                    self._config['cmdline'] = '--forks {}'.format(len(host_list))
+            username = getattr(config, 'host_username', None)
+            if username:
+                self._config['cmdline'] += ' --user {}'.format(username)
+            password = getattr(config, 'host_password', None)
+            if password:
+                self._config['passwords'] = {
+                    'password': password,
+                    'passphrase': password,
+                }
+            key_file = getattr(config, 'private_key', None)
+            if key_file:
+                self._config['cmdline'] += ' --private-key {}'.format(key_file)
+            elif password:
+                self._config['cmdline'] += ' --ask-pass --ask-become-pass'
+        logger.info(self._config)
+    def run(self, ansible_config, sudo=False):    # pragma: no cover
+        """Run Ansible runner.
+        Args:
+            ansible_config (dict): Ansible config dict.
+            sudo (bool): Run as sudo or not. Defaults to False.
+        Returns:
+            int: Ansible return code.
+        """
+        if sudo:
+            logger.info('Run as sudo ...')
+            ansible_config['cmdline'] += ' --become'
+        r = ansible_runner.run(**ansible_config)
+        if r.rc == 0:
+            logger.info('Run succeed, return code {}.'.format(r.rc))
+        else:
+            logger.warning('Run failed, return code {}.'.format(r.rc))
+        logger.info(r.stats)
+        return r.rc
+    def get_shell_config(self, cmd):
+        """Get ansible config for shell module.
+        Args:
+            cmd (str): Shell command for config.
+        Returns:
+            dict: Ansible config dict.
+        """
+        logger.info('Run {} on remote ...'.format(cmd))
+        ansible_config = {
+            **self._config,
+            'module': 'shell',
+            'module_args': cmd,
+        }
+        return ansible_config
+    def get_playbook_config(self, playbook, extravars=None):
+        """Get ansible config for playbook.
+        Args:
+            playbook (str): Playbook file name.
+            extravars (dict): Extra variables in playbook. Defaults to None.
+        Returns:
+            dict: Ansible config dict.
+        """
+        logger.info('Run playbook {} ...'.format(playbook))
+        ansible_config = {
+            **self._config,
+            'extravars': extravars,
+            'playbook': str(self._playbook_path / playbook),
+        }
+        return ansible_config
--- a/superbench/runner/playbooks/check_env.yaml
+++ b/superbench/runner/playbooks/check_env.yaml
+- name: Runtime Environment Check
+  hosts: all
+  gather_facts: false
+  max_fail_percentage: 0
+  vars:
+    container: sb-workspace
+  tasks:
+    - name: Checking container status
+      shell: docker inspect --format={{ '{{.State.Running}}' }} {{ container }}
+      register: result
+      ignore_errors: true
+      become: yes
+    - fail:
+        msg: Container {{ container }} is not running.
+      when: result is failed or result.stdout != "true"
+- name: Runtime Environment Update
+  hosts: all
+  gather_facts: true
+  vars:
+    workspace: '{{ ansible_user_dir }}/sb-workspace'
+    container: sb-workspace
+    sb_nodes: '{{ hostvars.values() | map(attribute="ansible_hostname") | sort }}'
+    sb_env: |
+      NNODES={{ sb_nodes | length }}
+      NODE_RANK={{ lookup('ansible.utils.index_of', sb_nodes, 'eq', ansible_hostname) }}
+      MASTER_ADDR={{ sb_nodes | first }}
+      MASTER_PORT=29500
+  tasks:
+    - name: Updating Config
+      copy:
+        src: '{{ output_dir }}/sb.config.yaml'
+        dest: '{{ workspace }}/sb.config.yaml'
+        mode: 0644
+      become: yes
+    - name: Updating Env Variables
+      copy:
+        content: '{{ sb_env }}'
+        dest: '{{ workspace }}/sb.env'
+        mode: 0644
+      become: yes
--- a/superbench/runner/playbooks/deploy.yaml
+++ b/superbench/runner/playbooks/deploy.yaml
+- name: Facts Gathering
+  hosts: all
+  gather_facts: true
+- name: Context Preparation
+  hosts: localhost
+  connection: localhost
+  gather_facts: false
+  tasks:
+    - name: Generating SSH Config
+      template:
+        src: ../templates/ssh_config.j2
+        dest: '{{ output_dir }}/ssh_config'
+        mode: 0640
+    - name: Generating SSH Key Pair
+      community.crypto.openssh_keypair:
+        path: '{{ output_dir }}/id_ed25519'
+        type: ed25519
+        comment: superbench
+        force: no
+- name: Remote Deployment
+  hosts: all
+  gather_facts: false
+  vars:
+    workspace: '{{ ansible_user_dir }}/sb-workspace'
+    container: sb-workspace
+  tasks:
+    - name: Creating Workspace
+      file:
+        path: '{{ item }}'
+        state: directory
+        mode: 0755
+      with_items:
+        - '{{ workspace }}'
+        - '{{ workspace }}/.ssh'
+    - name: Copying Context
+      copy:
+        src: '{{ item.src }}'
+        dest: '{{ item.dest }}'
+        mode: '{{ item.mode }}'
+      with_items:
+        - src: '{{ output_dir }}/ssh_config'
+          dest: '{{ workspace }}/.ssh/config'
+          mode: '644'
+        - src: '{{ output_dir }}/id_ed25519.pub'
+          dest: '{{ workspace }}/.ssh/authorized_keys'
+          mode: '644'
+        - src: '{{ output_dir }}/id_ed25519'
+          dest: '{{ workspace }}/.ssh/key'
+          mode: '400'
+      become: yes
+    - name: Trying to Login Registry
+      shell: |
+        docker login {{ docker_registry }} --username {{ docker_username }} --password {{ docker_password }}
+      become: yes
+      when: docker_registry is defined
+      ignore_errors: true
+    - name: Pulling Container Image
+      shell: |
+        docker pull {{ docker_image }}
+      become: yes
+    - name: Starting Container
+      shell: |
+        docker rm --force {{ container }} ||: && \
+        docker run -itd --name={{ container }} \
+          --privileged --net=host --ipc=host --gpus=all \
+          -w /root -v {{ workspace }}:/root -v /mnt:/mnt \
+          {{ docker_image }} bash && \
+        docker exec {{ container }} bash -c \
+          "chown -R root:root ~ && \
+          sed -i 's/[# ]*Port.*/Port {{ ssh_port }}/g' /etc/ssh/sshd_config && \
+          service ssh restart && sb help"
+      become: yes
--- a/superbench/runner/runner.py
+++ b/superbench/runner/runner.py
@@ -3,9 +3,13 @@
 """SuperBench Runner."""
+import random
 from pathlib import Path
+from omegaconf import ListConfig, OmegaConf
 from superbench.common.utils import SuperBenchLogger, logger
+from superbench.runner.ansible import AnsibleClient
 class SuperBenchRunner():
@@ -23,11 +27,16 @@ class SuperBenchRunner():
        self._docker_config = docker_config
        self._ansible_config = ansible_config
        self._output_dir = output_dir
+        self._ansible_client = AnsibleClient(ansible_config)
        self.__set_logger('sb-run.log')
        logger.info('Runner uses config: %s.', self._sb_config)
        logger.info('Runner writes to: %s.', self._output_dir)
+        self._sb_benchmarks = self._sb_config.superbench.benchmarks
+        self._sb_enabled_benchmarks = self.__get_enabled_benchmarks()
+        logger.info('Runner will run: %s', self._sb_enabled_benchmarks)
    def __set_logger(self, filename):
        """Set logger and add file handler.
@@ -36,6 +45,45 @@ class SuperBenchRunner():
        """
        SuperBenchLogger.add_handler(logger.logger, filename=str(Path(self._output_dir) / filename))
+    def __get_enabled_benchmarks(self):
+        """Get enabled benchmarks list.
+        Return:
+            list: List of benchmarks which will be executed.
+        """
+        if self._sb_config.superbench.enable:
+            if isinstance(self._sb_config.superbench.enable, str):
+                return [self._sb_config.superbench.enable]
+            elif isinstance(self._sb_config.superbench.enable, (list, ListConfig)):
+                return list(self._sb_config.superbench.enable)
+        return [k for k, v in self._sb_benchmarks.items() if v.enable]
+    def deploy(self):    # pragma: no cover
+        """Deploy SuperBench environment."""
+        logger.info('Preparing SuperBench environment.')
+        extravars = {
+            'ssh_port': random.randint(1 << 14, (1 << 15) - 1),
+            'output_dir': self._output_dir,
+            'docker_image': self._docker_config.image,
+        }
+        if bool(self._docker_config.username) and bool(self._docker_config.password):
+            extravars.update(
+                {
+                    'docker_registry': self._docker_config.registry,
+                    'docker_username': self._docker_config.username,
+                    'docker_password': self._docker_config.password,
+                }
+            )
+        self._ansible_client.run(self._ansible_client.get_playbook_config('deploy.yaml', extravars=extravars))
+    def check_env(self):    # pragma: no cover
+        """Check SuperBench environment."""
+        logger.info('Checking SuperBench environment.')
+        OmegaConf.save(config=self._sb_config, f=str(Path(self._output_dir) / 'sb.config.yaml'))
+        self._ansible_client.run(
+            self._ansible_client.get_playbook_config('check_env.yaml', extravars={'output_dir': self._output_dir})
+        )
    def run(self):
        """Run the SuperBench benchmarks distributedly.

--- a/superbench/runner/templates/ssh_config.j2
+++ b/superbench/runner/templates/ssh_config.j2
+{% for host in hostvars.values() | map(attribute='ansible_hostname') | sort %}
+Host node{{ loop.index0 }}
+    HostName {{ host }}
+    Port {{ ssh_port }}
+    IdentityFile /root/.ssh/key
+    StrictHostKeyChecking no
+{% endfor %}
--- a/tests/runner/test_ansible.py
+++ b/tests/runner/test_ansible.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+"""SuperBench Ansible Client test."""
+import os
+import unittest
+import tempfile
+from pathlib import Path
+from omegaconf import OmegaConf
+from superbench.runner.ansible import AnsibleClient
+class AnsibleClientTestCase(unittest.TestCase):
+    """A class for ansible client test cases."""
+    def setUp(self):
+        """Hook method for setting up the test fixture before exercising it."""
+        fd, self.host_file = tempfile.mkstemp()
+        os.write(
+            fd, (
+                'all:\n'
+                '  hosts:\n'
+                '    10.0.0.10:\n'
+                '    10.0.0.11:\n'
+                '    10.0.0.12:\n'
+                '    10.0.0.13:\n'
+                '    10.0.0.14:\n'
+            ).encode()
+        )
+        os.close(fd)
+        self.ansible_client = AnsibleClient(
+            OmegaConf.create({
+                'host_file': self.host_file,
+                'host_username': 'user',
+                'host_password': 'pass',
+            })
+        )
+    def tearDown(self):
+        """Hook method for deconstructing the test fixture after testing it."""
+        Path(self.host_file).unlink()
+    def test_init_config(self):
+        """Test initial config of client."""
+        self.assertDictEqual(
+            self.ansible_client._config, {
+                'private_data_dir': None,
+                'inventory': self.host_file,
+                'host_pattern': 'all',
+                'cmdline': '--forks 5 --user user --ask-pass --ask-become-pass',
+                'passwords': {
+                    'password': 'pass',
+                    'passphrase': 'pass',
+                },
+            }
+        )
+    def test_get_shell_config(self):
+        """Test get_shell_config of client."""
+        cmd = 'ls -la'
+        self.assertDictEqual(
+            self.ansible_client.get_shell_config(cmd), {
+                'private_data_dir': None,
+                'inventory': self.host_file,
+                'host_pattern': 'all',
+                'cmdline': '--forks 5 --user user --ask-pass --ask-become-pass',
+                'passwords': {
+                    'password': 'pass',
+                    'passphrase': 'pass',
+                },
+                'module': 'shell',
+                'module_args': cmd,
+            }
+        )
+    def test_get_playbook_config(self):
+        """Test get_playbook_config of client."""
+        self.assertDictEqual(
+            self.ansible_client.get_playbook_config('play', {'foo': 'bar'}), {
+                'private_data_dir': None,
+                'inventory': self.host_file,
+                'host_pattern': 'all',
+                'cmdline': '--forks 5 --user user --ask-pass --ask-become-pass',
+                'passwords': {
+                    'password': 'pass',
+                    'passphrase': 'pass',
+                },
+                'extravars': {
+                    'foo': 'bar',
+                },
+                'playbook': str(self.ansible_client._playbook_path / 'play'),
+            }
+        )