generate_ts_config.py 10.1 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
3

4
import sys
chicm-ms's avatar
chicm-ms committed
5
import os
6
import glob
7
8
9
import argparse
from utils import get_yml_content, dump_yml_content

chicm-ms's avatar
chicm-ms committed
10
TRAINING_SERVICE_FILE = os.path.join('config', 'training_service.yml')
11
TRAINING_SERVICE_FILE_V2 = os.path.join('config', 'training_service_v2.yml')
12
13
14

def update_training_service_config(args):
    config = get_yml_content(TRAINING_SERVICE_FILE)
15
    if args.nni_manager_ip is not None and args.config_version == 'v1':
16
        config[args.ts]['nniManagerIp'] = args.nni_manager_ip
SparkSnail's avatar
SparkSnail committed
17
18
19
20
21
22
23
    if args.ts == 'pai':
        if args.pai_user is not None:
            config[args.ts]['paiConfig']['userName'] = args.pai_user
        if args.pai_host is not None:
            config[args.ts]['paiConfig']['host'] = args.pai_host
        if args.pai_token is not None:
            config[args.ts]['paiConfig']['token'] = args.pai_token
24
25
        if args.pai_reuse is not None:
            config[args.ts]['paiConfig']['reuse'] = args.pai_reuse.lower() == 'true'
SparkSnail's avatar
SparkSnail committed
26
27
        if args.nni_docker_image is not None:
            config[args.ts]['trial']['image'] = args.nni_docker_image
chicm-ms's avatar
chicm-ms committed
28
        if args.nni_manager_nfs_mount_path is not None:
SparkSnail's avatar
SparkSnail committed
29
            config[args.ts]['trial']['nniManagerNFSMountPath'] = args.nni_manager_nfs_mount_path
chicm-ms's avatar
chicm-ms committed
30
        if args.container_nfs_mount_path is not None:
SparkSnail's avatar
SparkSnail committed
31
            config[args.ts]['trial']['containerNFSMountPath'] = args.container_nfs_mount_path
32
33
        if args.pai_storage_config_name is not None:
            config[args.ts]['trial']['paiStorageConfigName'] = args.pai_storage_config_name
SparkSnail's avatar
SparkSnail committed
34
35
        if args.vc is not None:
            config[args.ts]['trial']['virtualCluster'] = args.vc
liuzhe-lz's avatar
liuzhe-lz committed
36
37
        if args.debug is not None:
            config[args.ts]['debug'] = args.debug.lower() == 'true'
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
    elif args.ts == 'kubeflow':
        if args.nfs_server is not None:
            config[args.ts]['kubeflowConfig']['nfs']['server'] = args.nfs_server
        if args.nfs_path is not None:
            config[args.ts]['kubeflowConfig']['nfs']['path'] = args.nfs_path
        if args.keyvault_vaultname is not None:
            config[args.ts]['kubeflowConfig']['keyVault']['vaultName'] = args.keyvault_vaultname
        if args.keyvault_name is not None:
            config[args.ts]['kubeflowConfig']['keyVault']['name'] = args.keyvault_name
        if args.azs_account is not None:
            config[args.ts]['kubeflowConfig']['azureStorage']['accountName'] = args.azs_account
        if args.azs_share is not None:
            config[args.ts]['kubeflowConfig']['azureStorage']['azureShare'] = args.azs_share
        if args.nni_docker_image is not None:
            config[args.ts]['trial']['worker']['image'] = args.nni_docker_image
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
    elif args.ts == 'frameworkcontroller':
        if args.nfs_server is not None:
            config[args.ts]['frameworkcontrollerConfig']['nfs']['server'] = args.nfs_server
        if args.nfs_path is not None:
            config[args.ts]['frameworkcontrollerConfig']['nfs']['path'] = args.nfs_path
        if args.keyvault_vaultname is not None:
            config[args.ts]['frameworkcontrollerConfig']['keyVault']['vaultName'] = args.keyvault_vaultname
        if args.keyvault_name is not None:
            config[args.ts]['frameworkcontrollerConfig']['keyVault']['name'] = args.keyvault_name
        if args.azs_account is not None:
            config[args.ts]['frameworkcontrollerConfig']['azureStorage']['accountName'] = args.azs_account
        if args.azs_share is not None:
            config[args.ts]['frameworkcontrollerConfig']['azureStorage']['azureShare'] = args.azs_share
        if args.nni_docker_image is not None:
            config[args.ts]['trial']['taskRoles'][0]['image'] = args.nni_docker_image
SparkSnail's avatar
SparkSnail committed
68
69
70
71
72
73
74
75
76
    elif args.ts == 'remote':
        if args.remote_user is not None:
            config[args.ts]['machineList'][0]['username'] = args.remote_user
        if args.remote_host is not None:
            config[args.ts]['machineList'][0]['ip'] = args.remote_host
        if args.remote_port is not None:
            config[args.ts]['machineList'][0]['port'] = args.remote_port
        if args.remote_pwd is not None:
            config[args.ts]['machineList'][0]['passwd'] = args.remote_pwd
SparkSnail's avatar
SparkSnail committed
77
78
        if args.remote_reuse is not None:
            config[args.ts]['remoteConfig']['reuse'] = args.remote_reuse.lower() == 'true'
79
80
81
82
        if args.azurestoragetoken is not None:
            config[args.ts]['sharedStorage']['storageAccountKey'] = args.azurestoragetoken
        if args.nfs_server is not None:
            config[args.ts]['sharedStorage']['nfsServer'] = args.nfs_server
83
84
85
86
87
88
        if args.local_mount_point is not None:
            config[args.ts]['sharedStorage']['localMountPoint'] = args.local_mount_point
        if args.remote_mount_point is not None:
            config[args.ts]['sharedStorage']['remoteMountPoint'] = args.remote_mount_point
        if args.exported_directory is not None:
            config[args.ts]['sharedStorage']['exportedDirectory'] = args.exported_directory
SparkSnail's avatar
SparkSnail committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
    elif args.ts == 'adl':
        if args.nni_docker_image is not None:
            config[args.ts]['trial']['image'] = args.nni_docker_image
        if args.checkpoint_storage_class is not None:
            config[args.ts]['trial']['checkpoint']['storageClass'] = args.checkpoint_storage_class
        if args.checkpoint_storage_size is not None:
            config[args.ts]['trial']['checkpoint']['storageSize'] = args.checkpoint_storage_size
        if args.adaptive is not None:
            config[args.ts]['trial']['adaptive'] = args.adaptive
        if args.adl_nfs_server is not None and args.adl_nfs_path is not None and args.adl_nfs_container_mount_path is not None:
            # default keys in nfs is empty, need to initialize
            config[args.ts]['trial']['nfs'] = {}
            config[args.ts]['trial']['nfs']['server'] = args.adl_nfs_server
            config[args.ts]['trial']['nfs']['path'] = args.adl_nfs_path
            config[args.ts]['trial']['nfs']['container_mount_path'] = args.nadl_fs_container_mount_path
SparkSnail's avatar
SparkSnail committed
104
105
106
107
108
109
110
111
112
113
114
    elif args.ts == 'aml':
        if args.nni_docker_image is not None:
            config[args.ts]['trial']['image'] = args.nni_docker_image
        if args.subscription_id is not None:
            config[args.ts]['amlConfig']['subscriptionId'] = args.subscription_id
        if args.resource_group is not None:
            config[args.ts]['amlConfig']['resourceGroup'] = args.resource_group
        if args.workspace_name is not None:
            config[args.ts]['amlConfig']['workspaceName'] = args.workspace_name
        if args.compute_target is not None:
            config[args.ts]['amlConfig']['computeTarget'] = args.compute_target
115
116
    dump_yml_content(TRAINING_SERVICE_FILE, config)

117
118
119
120
121
122
    if args.ts == 'hybrid':
        config = get_yml_content(TRAINING_SERVICE_FILE_V2)
        config[args.ts]['trainingService'][0]['machineList'][0]['user'] = args.remote_user
        config[args.ts]['trainingService'][0]['machineList'][0]['host'] = args.remote_host
        config[args.ts]['trainingService'][0]['machineList'][0]['password'] = args.remote_pwd
        config[args.ts]['trainingService'][0]['machineList'][0]['port'] = args.remote_port
123
124
125
126
        config[args.ts]['trainingService'][2]['subscriptionId'] = args.subscription_id
        config[args.ts]['trainingService'][2]['resourceGroup'] = args.resource_group
        config[args.ts]['trainingService'][2]['workspaceName'] = args.workspace_name
        config[args.ts]['trainingService'][2]['computeTarget'] = args.compute_target
127
128
129
        config[args.ts]['nni_manager_ip'] = args.nni_manager_ip
        dump_yml_content(TRAINING_SERVICE_FILE_V2, config)

130

131
132
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
133
134
    parser.add_argument("--ts", type=str, choices=['pai', 'kubeflow', 'remote', 'local', 'frameworkcontroller', 'adl', 'aml', 'hybrid'], default='pai')
    parser.add_argument("--config_version", type=str, choices=['v1', 'v2'], default='v1')
135
136
    parser.add_argument("--nni_docker_image", type=str)
    parser.add_argument("--nni_manager_ip", type=str)
137
    # args for remote with shared storage
138
139
    parser.add_argument("--azurestoragetoken", type=str)
    parser.add_argument("--nfs_server", type=str)
140
141
142
    parser.add_argument("--local_mount_point", type=str)
    parser.add_argument("--remote_mount_point", type=str)
    parser.add_argument("--exported_directory", type=str)
143
144
145
146
147
148
    # args for PAI
    parser.add_argument("--pai_user", type=str)
    parser.add_argument("--pai_pwd", type=str)
    parser.add_argument("--pai_host", type=str)
    parser.add_argument("--data_dir", type=str)
    parser.add_argument("--output_dir", type=str)
149
    parser.add_argument("--vc", type=str)
SparkSnail's avatar
SparkSnail committed
150
    parser.add_argument("--pai_token", type=str)
151
    parser.add_argument("--pai_reuse", type=str)
152
    parser.add_argument("--pai_storage_config_name", type=str)
SparkSnail's avatar
SparkSnail committed
153
154
    parser.add_argument("--nni_manager_nfs_mount_path", type=str)
    parser.add_argument("--container_nfs_mount_path", type=str)
liuzhe-lz's avatar
liuzhe-lz committed
155
    parser.add_argument("--debug", type=str)
156
    # args for kubeflow and frameworkController
157
158
159
160
161
    parser.add_argument("--nfs_path", type=str)
    parser.add_argument("--keyvault_vaultname", type=str)
    parser.add_argument("--keyvault_name", type=str)
    parser.add_argument("--azs_account", type=str)
    parser.add_argument("--azs_share", type=str)
SparkSnail's avatar
SparkSnail committed
162
163
164
165
166
    # args for remote
    parser.add_argument("--remote_user", type=str)
    parser.add_argument("--remote_pwd", type=str)
    parser.add_argument("--remote_host", type=str)
    parser.add_argument("--remote_port", type=int)
SparkSnail's avatar
SparkSnail committed
167
    parser.add_argument("--remote_reuse", type=str)
SparkSnail's avatar
SparkSnail committed
168
169
170
171
172
173
174
    # args for adl
    parser.add_argument("--checkpoint_storage_class", type=str)
    parser.add_argument("--checkpoint_storage_size", type=str)
    parser.add_argument("--adaptive", type=str)
    parser.add_argument("--adl_nfs_server", type=str)
    parser.add_argument("--adl_nfs_path", type=str)
    parser.add_argument("--adl_nfs_container_mount_path", type=str)
SparkSnail's avatar
SparkSnail committed
175
176
177
178
179
    # args for aml
    parser.add_argument("--subscription_id", type=str)
    parser.add_argument("--resource_group", type=str)
    parser.add_argument("--workspace_name", type=str)
    parser.add_argument("--compute_target", type=str)
180
181
182
    args = parser.parse_args()

    update_training_service_config(args)