frameworkcontroller.py 1.73 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
Configuration for FrameworkController training service.

Check the reference_ for explaination of each field.

You may also want to check `FrameworkController training service doc`_.

.. _reference: https://nni.readthedocs.io/en/stable/reference/experiment_config.html

.. _FrameworkController training service doc: https://nni.readthedocs.io/en/stable/TrainingService/FrameworkControllerMode.html

"""

__all__ = ['FrameworkControllerConfig', 'FrameworkControllerRoleConfig', 'FrameworkAttemptCompletionPolicy']

from dataclasses import dataclass
from typing import List, Optional, Union

from ..base import ConfigBase
from ..training_service import TrainingServiceConfig
from .k8s_storage import K8sStorageConfig

@dataclass(init=False)
class FrameworkAttemptCompletionPolicy(ConfigBase):
    min_failed_task_count: int
    min_succeed_task_count: int

@dataclass(init=False)
class FrameworkControllerRoleConfig(ConfigBase):
    name: str
    docker_image: str = 'msranni/nni:latest'
    task_number: int
    command: str
    gpu_number: int
    cpu_number: int
    memory_size: Union[str, int]
    framework_attempt_completion_policy: FrameworkAttemptCompletionPolicy

@dataclass(init=False)
class FrameworkControllerConfig(TrainingServiceConfig):
    platform: str = 'frameworkcontroller'
    storage: K8sStorageConfig
    service_account_name: Optional[str]
    task_roles: List[FrameworkControllerRoleConfig]
    reuse_mode: Optional[bool] = True
49
50
51
52
53
54

    def _canonicalize(self, parents):
        super()._canonicalize(parents)
        # framework controller does not need these fields, set empty string for type check
        if self.trial_command is None:
            self.trial_command = ''