"vscode:/vscode.git/clone" did not exist on "383bbd54bc621086e05aa1b030d8d4d5635b25e6"
config_schema.py 8.94 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import os
from schema import Schema, And, Use, Optional, Regex, Or

24
common_schema = {
25
26
'authorName': str,
'experimentName': str,
27
Optional('description'): str,
28
'trialConcurrency': And(int, lambda n: 1 <=n <= 999999),
29
30
Optional('maxExecDuration'): Regex(r'^[1-9][0-9]*[s|m|h|d]$'),
Optional('maxTrialNum'): And(int, lambda x: 1 <= x <= 99999),
31
'trainingServicePlatform': And(str, lambda x: x in ['remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller']),
32
Optional('searchSpacePath'): os.path.exists,
chicm-ms's avatar
chicm-ms committed
33
Optional('multiPhase'): bool,
chicm-ms's avatar
chicm-ms committed
34
Optional('multiThread'): bool,
35
Optional('nniManagerIp'): str,
36
Optional('logDir'): os.path.isdir,
37
Optional('debug'): bool,
38
Optional('logLevel'): Or('trace', 'debug', 'info', 'warning', 'error', 'fatal'),
SparkSnail's avatar
SparkSnail committed
39
Optional('logCollection'): Or('http', 'none'),
40
'useAnnotation': bool,
QuanluZhang's avatar
QuanluZhang committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
Optional('advisor'): Or({
    'builtinAdvisorName': Or('Hyperband'),
    'classArgs': {
        'optimize_mode': Or('maximize', 'minimize'),
        Optional('R'): int,
        Optional('eta'): int
    },
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
},{
    'codeDir': os.path.exists,
    'classFileName': str,
    'className': str,
    Optional('classArgs'): dict,
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
}),
Optional('tuner'): Or({
57
58
59
60
    'builtinTunerName': Or('TPE', 'Random', 'Anneal', 'SMAC', 'Evolution'),
    Optional('classArgs'): {
        'optimize_mode': Or('maximize', 'minimize')
    },
61
    Optional('includeIntermediateResults'): bool,
62
63
64
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
},{
    'builtinTunerName': Or('BatchTuner', 'GridSearch'),
65
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
Lee's avatar
Lee committed
66
67
68
69
70
71
72
73
74
75
},{
    'builtinTunerName': 'NetworkMorphism',
    'classArgs': {
        Optional('optimize_mode'): Or('maximize', 'minimize'),
        Optional('task'): And(str, lambda x: x in ['cv','nlp','common']),
        Optional('input_width'):  int,
        Optional('input_channel'):  int,
        Optional('n_output_node'):  int,
        },
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
xuehui's avatar
xuehui committed
76
77
78
79
80
81
82
83
84
85
},{
    'builtinTunerName': 'MetisTuner',
    'classArgs': {
        Optional('optimize_mode'): Or('maximize', 'minimize'),
        Optional('no_resampling'):  bool,
        Optional('no_candidates'):  bool,
        Optional('selection_num_starting_points'):  int,
        Optional('cold_start_num'):  int,
        },
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
86
87
88
89
},{
    'codeDir': os.path.exists,
    'classFileName': str,
    'className': str,
90
    Optional('classArgs'): dict,
91
92
93
94
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
}),
Optional('assessor'): Or({
    'builtinAssessorName': lambda x: x in ['Medianstop'],
95
96
97
98
    Optional('classArgs'): {
        Optional('optimize_mode'): Or('maximize', 'minimize'),
        Optional('start_step'): And(int, lambda x: 0 <= x <= 9999)
    },
chicm-ms's avatar
chicm-ms committed
99
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999)
100
101
102
103
104
105
},{
    'builtinAssessorName': lambda x: x in ['Curvefitting'],
    Optional('classArgs'): {
        'epoch_num': And(int, lambda x: 0 <= x <= 9999),
        Optional('optimize_mode'): Or('maximize', 'minimize'),
        Optional('start_step'): And(int, lambda x: 0 <= x <= 9999),
106
107
        Optional('threshold'): And(float, lambda x: 0.0 <= x <= 9999.0),
        Optional('gap'): And(int, lambda x: 1 <= x <= 9999)
108
109
    },
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999)
110
111
112
113
},{
    'codeDir': os.path.exists,
    'classFileName': str,
    'className': str,
114
115
    Optional('classArgs'): dict,
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
116
}),
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
}

common_trial_schema = {
'trial':{
    'command': str,
    'codeDir': os.path.exists,
    'gpuNum': And(int, lambda x: 0 <= x <= 99999)
    }
}

pai_trial_schema = {
'trial':{
    'command': str,
    'codeDir': os.path.exists,
    'gpuNum': And(int, lambda x: 0 <= x <= 99999),
    'cpuNum': And(int, lambda x: 0 <= x <= 99999),
    'memoryMB': int,
    'image': str,
135
    Optional('shmMB'): int,
fishyds's avatar
fishyds committed
136
    Optional('dataDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),
137
138
    Optional('outputDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),
    Optional('virtualCluster'): str
139
140
141
142
143
144
145
146
147
148
149
    }
}

pai_config_schema = {
'paiConfig':{
  'userName': str,
  'passWord': str,
  'host': str
}
}

150
151
kubeflow_trial_schema = {
'trial':{
152
153
154
155
156
157
158
159
160
        'codeDir':  os.path.exists,
        Optional('ps'): {
            'replicas': int,
            'command': str,
            'gpuNum': And(int, lambda x: 0 <= x <= 99999),
            'cpuNum': And(int, lambda x: 0 <= x <= 99999),
            'memoryMB': int,
            'image': str
        },
161
162
163
164
165
166
167
168
        Optional('master'): {
            'replicas': int,
            'command': str,
            'gpuNum': And(int, lambda x: 0 <= x <= 99999),
            'cpuNum': And(int, lambda x: 0 <= x <= 99999),
            'memoryMB': int,
            'image': str
        },
169
        Optional('worker'):{
170
171
172
173
174
175
176
            'replicas': int,
            'command': str,
            'gpuNum': And(int, lambda x: 0 <= x <= 99999),
            'cpuNum': And(int, lambda x: 0 <= x <= 99999),
            'memoryMB': int,
            'image': str
        } 
177
178
179
180
    }
}

kubeflow_config_schema = {
SparkSnail's avatar
SparkSnail committed
181
    'kubeflowConfig':Or({
182
        'operator': Or('tf-operator', 'pytorch-operator'),
183
        'apiVersion': str,
184
        Optional('storage'): Or('nfs', 'azureStorage'),
185
186
187
        'nfs': {
            'server': str,
            'path': str
188
        }
SparkSnail's avatar
SparkSnail committed
189
    },{
190
        'operator': Or('tf-operator', 'pytorch-operator'),
191
        'apiVersion': str,
192
        Optional('storage'): Or('nfs', 'azureStorage'),
SparkSnail's avatar
SparkSnail committed
193
194
195
196
197
198
199
200
201
        'keyVault': {
            'vaultName': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),
            'name': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}')
        },
        'azureStorage': {
            'accountName': Regex('([0-9]|[a-z]|[A-Z]|-){3,31}'),
            'azureShare': Regex('([0-9]|[a-z]|[A-Z]|-){3,63}')
        }
    })
202
203
}

204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
frameworkcontroller_trial_schema = {
    'trial':{
        'codeDir':  os.path.exists,
        'taskRoles': [{
            'name': str,
            'taskNum': int,
            'frameworkAttemptCompletionPolicy': {
                'minFailedTaskCount': int,
                'minSucceededTaskCount': int
            },
            'command': str,
            'gpuNum': And(int, lambda x: 0 <= x <= 99999),
            'cpuNum': And(int, lambda x: 0 <= x <= 99999),
            'memoryMB': int,
            'image': str
        }]
    }
}

frameworkcontroller_config_schema = {
    'frameworkcontrollerConfig':Or({
        Optional('storage'): Or('nfs', 'azureStorage'),
226
        Optional('serviceAccountName'): str,
227
228
229
230
231
232
        'nfs': {
            'server': str,
            'path': str
        }
    },{
        Optional('storage'): Or('nfs', 'azureStorage'),
233
        Optional('serviceAccountName'): str,
234
235
236
237
238
239
240
241
242
243
244
245
        'keyVault': {
            'vaultName': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),
            'name': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}')
        },
        'azureStorage': {
            'accountName': Regex('([0-9]|[a-z]|[A-Z]|-){3,31}'),
            'azureShare': Regex('([0-9]|[a-z]|[A-Z]|-){3,63}')
        }
    })
}


246
machine_list_schima = {
247
248
Optional('machineList'):[Or({
    'ip': str,
SparkSnail's avatar
SparkSnail committed
249
    Optional('port'): And(int, lambda x: 0 < x < 65535),
250
251
252
253
    'username': str,
    'passwd': str
    },{
    'ip': str,
SparkSnail's avatar
SparkSnail committed
254
    Optional('port'): And(int, lambda x: 0 < x < 65535),
255
256
257
    'username': str,
    'sshKeyPath': os.path.exists,
    Optional('passphrase'): str
258
})]
259
}
260
261
262
263
264

LOCAL_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema})

REMOTE_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema, **machine_list_schima})

265
266
267
PAI_CONFIG_SCHEMA = Schema({**common_schema, **pai_trial_schema, **pai_config_schema})

KUBEFLOW_CONFIG_SCHEMA = Schema({**common_schema, **kubeflow_trial_schema, **kubeflow_config_schema})
268
269

FRAMEWORKCONTROLLER_CONFIG_SCHEMA = Schema({**common_schema, **frameworkcontroller_trial_schema, **frameworkcontroller_config_schema})