config_schema.py 19.7 KB
Newer Older
liuzhe-lz's avatar
liuzhe-lz committed
1
2
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
3
4

import os
chicm-ms's avatar
chicm-ms committed
5
from schema import Schema, And, Optional, Regex, Or
6
7
8
from .constants import SCHEMA_TYPE_ERROR, SCHEMA_RANGE_ERROR, SCHEMA_PATH_ERROR


chicm-ms's avatar
chicm-ms committed
9
def setType(key, valueType):
10
    '''check key type'''
chicm-ms's avatar
chicm-ms committed
11
    return And(valueType, error=SCHEMA_TYPE_ERROR % (key, valueType.__name__))
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

def setChoice(key, *args):
    '''check choice'''
    return And(lambda n: n in args, error=SCHEMA_RANGE_ERROR % (key, str(args)))

def setNumberRange(key, keyType, start, end):
    '''check number range'''
    return And(
        And(keyType, error=SCHEMA_TYPE_ERROR % (key, keyType.__name__)),
        And(lambda n: start <= n <= end, error=SCHEMA_RANGE_ERROR % (key, '(%s,%s)' % (start, end))),
    )

def setPathCheck(key):
    '''check if path exist'''
    return And(os.path.exists, error=SCHEMA_PATH_ERROR % key)
27

28
common_schema = {
29
30
31
32
    'authorName': setType('authorName', str),
    'experimentName': setType('experimentName', str),
    Optional('description'): setType('description', str),
    'trialConcurrency': setNumberRange('trialConcurrency', int, 1, 99999),
chicm-ms's avatar
chicm-ms committed
33
    Optional('maxExecDuration'): And(Regex(r'^[1-9][0-9]*[s|m|h|d]$', error='ERROR: maxExecDuration format is [digit]{s,m,h,d}')),
34
35
36
37
38
39
40
41
    Optional('maxTrialNum'): setNumberRange('maxTrialNum', int, 1, 99999),
    'trainingServicePlatform': setChoice('trainingServicePlatform', 'remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller'),
    Optional('searchSpacePath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'searchSpacePath'),
    Optional('multiPhase'): setType('multiPhase', bool),
    Optional('multiThread'): setType('multiThread', bool),
    Optional('nniManagerIp'): setType('nniManagerIp', str),
    Optional('logDir'): And(os.path.isdir, error=SCHEMA_PATH_ERROR % 'logDir'),
    Optional('debug'): setType('debug', bool),
42
    Optional('versionCheck'): setType('versionCheck', bool),
43
44
45
46
47
48
49
    Optional('logLevel'): setChoice('logLevel', 'trace', 'debug', 'info', 'warning', 'error', 'fatal'),
    Optional('logCollection'): setChoice('logCollection', 'http', 'none'),
    'useAnnotation': setType('useAnnotation', bool),
    Optional('tuner'): dict,
    Optional('advisor'): dict,
    Optional('assessor'): dict,
    Optional('localConfig'): {
50
51
52
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
        Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
        Optional('useActiveGpu'): setType('useActiveGpu', bool)
53
54
55
    }
}
tuner_schema_dict = {
56
57
    ('Anneal', 'SMAC'): {
        'builtinTunerName': setChoice('builtinTunerName', 'Anneal', 'SMAC'),
58
59
60
61
        Optional('classArgs'): {
            'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
        },
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
62
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
QuanluZhang's avatar
QuanluZhang committed
63
    },
64
65
66
67
    ('Evolution'): {
        'builtinTunerName': setChoice('builtinTunerName', 'Evolution'),
        Optional('classArgs'): {
            'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
Lee's avatar
Lee committed
68
            Optional('population_size'): setNumberRange('population_size', int, 0, 99999),
69
        },
70
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
71
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
72
    },
73
74
    ('BatchTuner', 'GridSearch', 'Random'): {
        'builtinTunerName': setChoice('builtinTunerName', 'BatchTuner', 'GridSearch', 'Random'),
75
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
76
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
Shufan Huang's avatar
Shufan Huang committed
77
    },
xuehui's avatar
xuehui committed
78
79
    'TPE': {
        'builtinTunerName': 'TPE',
80
        Optional('classArgs'): {
xuehui's avatar
xuehui committed
81
82
83
84
85
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('parallel_optimize'): setType('parallel_optimize', bool),
            Optional('constant_liar_type'): setChoice('constant_liar_type', 'min', 'max', 'mean')
        },
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
86
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
xuehui's avatar
xuehui committed
87
    },
88
89
    'NetworkMorphism': {
        'builtinTunerName': 'NetworkMorphism',
90
        Optional('classArgs'): {
91
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
chicm-ms's avatar
chicm-ms committed
92
            Optional('task'): setChoice('task', 'cv', 'nlp', 'common'),
93
94
95
96
            Optional('input_width'): setType('input_width', int),
            Optional('input_channel'): setType('input_channel', int),
            Optional('n_output_node'): setType('n_output_node', int),
            },
97
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
98
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
99
    },
100
101
    'MetisTuner': {
        'builtinTunerName': 'MetisTuner',
102
        Optional('classArgs'): {
103
104
105
106
107
108
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('no_resampling'): setType('no_resampling', bool),
            Optional('no_candidates'): setType('no_candidates', bool),
            Optional('selection_num_starting_points'):  setType('selection_num_starting_points', int),
            Optional('cold_start_num'): setType('cold_start_num', int),
            },
109
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
110
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
111
    },
Guoxin's avatar
Guoxin committed
112
113
    'GPTuner': {
        'builtinTunerName': 'GPTuner',
114
        Optional('classArgs'): {
Guoxin's avatar
Guoxin committed
115
116
117
118
119
120
121
122
123
124
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('utility'): setChoice('utility', 'ei', 'ucb', 'poi'),
            Optional('kappa'): setType('kappa', float),
            Optional('xi'): setType('xi', float),
            Optional('nu'): setType('nu', float),
            Optional('alpha'): setType('alpha', float),
            Optional('cold_start_num'): setType('cold_start_num', int),
            Optional('selection_num_warm_up'):  setType('selection_num_warm_up', int),
            Optional('selection_num_starting_points'):  setType('selection_num_starting_points', int),
            },
chicm-ms's avatar
chicm-ms committed
125
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
126
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
Guoxin's avatar
Guoxin committed
127
    },
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
    'PPOTuner': {
        'builtinTunerName': 'PPOTuner',
        'classArgs': {
            'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('trials_per_update'): setNumberRange('trials_per_update', int, 0, 99999),
            Optional('epochs_per_update'): setNumberRange('epochs_per_update', int, 0, 99999),
            Optional('minibatch_size'): setNumberRange('minibatch_size', int, 0, 99999),
            Optional('ent_coef'): setType('ent_coef', float),
            Optional('lr'): setType('lr', float),
            Optional('vf_coef'): setType('vf_coef', float),
            Optional('max_grad_norm'): setType('max_grad_norm', float),
            Optional('gamma'): setType('gamma', float),
            Optional('lam'): setType('lam', float),
            Optional('cliprange'): setType('cliprange', float),
        },
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
144
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
145
    },
146
147
148
149
150
    'customized': {
        'codeDir': setPathCheck('codeDir'),
        'classFileName': setType('classFileName', str),
        'className': setType('className', str),
        Optional('classArgs'): dict,
151
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
152
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
153
154
155
156
157
158
159
160
161
162
    }
}

advisor_schema_dict = {
    'Hyperband':{
        'builtinAdvisorName': Or('Hyperband'),
        'classArgs': {
            'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('R'): setType('R', int),
            Optional('eta'): setType('eta', int)
xuehui's avatar
xuehui committed
163
        },
164
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
165
    },
166
167
168
169
170
171
    'BOHB':{
        'builtinAdvisorName': Or('BOHB'),
        'classArgs': {
            'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('min_budget'): setNumberRange('min_budget', int, 0, 9999),
            Optional('max_budget'): setNumberRange('max_budget', int, 0, 9999),
172
            Optional('eta'):setNumberRange('eta', int, 0, 9999),
173
174
175
176
177
178
179
            Optional('min_points_in_model'): setNumberRange('min_points_in_model', int, 0, 9999),
            Optional('top_n_percent'): setNumberRange('top_n_percent', int, 1, 99),
            Optional('num_samples'): setNumberRange('num_samples', int, 1, 9999),
            Optional('random_fraction'): setNumberRange('random_fraction', float, 0, 9999),
            Optional('bandwidth_factor'): setNumberRange('bandwidth_factor', float, 0, 9999),
            Optional('min_bandwidth'): setNumberRange('min_bandwidth', float, 0, 9999),
        },
180
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
181
    },
182
183
184
185
186
    'customized':{
        'codeDir': setPathCheck('codeDir'),
        'classFileName': setType('classFileName', str),
        'className': setType('className', str),
        Optional('classArgs'): dict,
187
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
188
    }
189
}
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214

assessor_schema_dict = {
    'Medianstop': {
        'builtinAssessorName': 'Medianstop',
        Optional('classArgs'): {
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('start_step'): setNumberRange('start_step', int, 0, 9999),
        },
    },
    'Curvefitting': {
        'builtinAssessorName': 'Curvefitting',
        Optional('classArgs'): {
            'epoch_num': setNumberRange('epoch_num', int, 0, 9999),
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('start_step'): setNumberRange('start_step', int, 0, 9999),
            Optional('threshold'): setNumberRange('threshold', float, 0, 9999),
            Optional('gap'): setNumberRange('gap', int, 1, 9999),
        },
    },
    'customized': {
        'codeDir': setPathCheck('codeDir'),
        'classFileName': setType('classFileName', str),
        'className': setType('className', str),
        Optional('classArgs'): dict,
    }
215
216
217
}

common_trial_schema = {
chicm-ms's avatar
chicm-ms committed
218
219
220
221
222
    'trial':{
        'command': setType('command', str),
        'codeDir': setPathCheck('codeDir'),
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
        Optional('nasMode'): setChoice('nasMode', 'classic_mode', 'enas_mode', 'oneshot_mode', 'darts_mode')
223
224
225
226
    }
}

pai_trial_schema = {
chicm-ms's avatar
chicm-ms committed
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
    'trial':{
        'command': setType('command', str),
        'codeDir': setPathCheck('codeDir'),
        'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
        'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
        'memoryMB': setType('memoryMB', int),
        'image': setType('image', str),
        Optional('authFile'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'authFile'),
        Optional('shmMB'): setType('shmMB', int),
        Optional('dataDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
                            error='ERROR: dataDir format error, dataDir format is hdfs://xxx.xxx.xxx.xxx:xxx'),
        Optional('outputDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
                            error='ERROR: outputDir format error, outputDir format is hdfs://xxx.xxx.xxx.xxx:xxx'),
        Optional('virtualCluster'): setType('virtualCluster', str),
        Optional('nasMode'): setChoice('nasMode', 'classic_mode', 'enas_mode', 'oneshot_mode', 'darts_mode'),
        Optional('portList'): [{
            "label": setType('label', str),
            "beginAt": setType('beginAt', int),
            "portNumber": setType('portNumber', int)
        }]
247
248
249
250
    }
}

pai_config_schema = {
251
    'paiConfig': Or({
252
253
254
        'userName': setType('userName', str),
        'passWord': setType('passWord', str),
        'host': setType('host', str)
255
256
257
258
259
    }, {
        'userName': setType('userName', str),
        'token': setType('token', str),
        'host': setType('host', str)
    })
260
261
}

262
kubeflow_trial_schema = {
chicm-ms's avatar
chicm-ms committed
263
    'trial':{
264
        'codeDir':  setPathCheck('codeDir'),
265
        Optional('nasMode'): setChoice('nasMode', 'classic_mode', 'enas_mode', 'oneshot_mode', 'darts_mode'),
266
        Optional('ps'): {
267
268
269
270
271
            'replicas': setType('replicas', int),
            'command': setType('command', str),
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
272
273
            'image': setType('image', str),
            Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath')
274
        },
275
        Optional('master'): {
276
277
278
279
280
            'replicas': setType('replicas', int),
            'command': setType('command', str),
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
281
282
            'image': setType('image', str),
            Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath')
283
        },
284
        Optional('worker'):{
285
286
287
288
289
            'replicas': setType('replicas', int),
            'command': setType('command', str),
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
290
291
            'image': setType('image', str),
            Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath')
292
        }
293
294
295
296
    }
}

kubeflow_config_schema = {
SparkSnail's avatar
SparkSnail committed
297
    'kubeflowConfig':Or({
298
299
300
        'operator': setChoice('operator', 'tf-operator', 'pytorch-operator'),
        'apiVersion': setType('apiVersion', str),
        Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'),
301
        'nfs': {
302
303
            'server': setType('server', str),
            'path': setType('path', str)
304
        }
chicm-ms's avatar
chicm-ms committed
305
    }, {
306
307
308
        'operator': setChoice('operator', 'tf-operator', 'pytorch-operator'),
        'apiVersion': setType('apiVersion', str),
        Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'),
SparkSnail's avatar
SparkSnail committed
309
        'keyVault': {
310
311
312
313
            'vaultName': And(Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),\
                         error='ERROR: vaultName format error, vaultName support using (0-9|a-z|A-Z|-)'),
            'name': And(Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),\
                    error='ERROR: name format error, name support using (0-9|a-z|A-Z|-)')
SparkSnail's avatar
SparkSnail committed
314
315
        },
        'azureStorage': {
316
317
318
319
            'accountName': And(Regex('([0-9]|[a-z]|[A-Z]|-){3,31}'),\
                           error='ERROR: accountName format error, accountName support using (0-9|a-z|A-Z|-)'),
            'azureShare': And(Regex('([0-9]|[a-z]|[A-Z]|-){3,63}'),\
                          error='ERROR: azureShare format error, azureShare support using (0-9|a-z|A-Z|-)')
320
321
        },
        Optional('uploadRetryCount'): setNumberRange('uploadRetryCount', int, 1, 99999)
SparkSnail's avatar
SparkSnail committed
322
    })
323
324
}

325
326
frameworkcontroller_trial_schema = {
    'trial':{
327
        'codeDir':  setPathCheck('codeDir'),
328
        'taskRoles': [{
329
330
            'name': setType('name', str),
            'taskNum': setType('taskNum', int),
331
            'frameworkAttemptCompletionPolicy': {
332
333
                'minFailedTaskCount': setType('minFailedTaskCount', int),
                'minSucceededTaskCount': setType('minSucceededTaskCount', int),
334
            },
335
336
337
338
            'command': setType('command', str),
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
339
340
            'image': setType('image', str),
            Optional('privateRegistryAuthPath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'privateRegistryAuthPath')
341
342
343
344
345
346
        }]
    }
}

frameworkcontroller_config_schema = {
    'frameworkcontrollerConfig':Or({
347
348
        Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'),
        Optional('serviceAccountName'): setType('serviceAccountName', str),
349
        'nfs': {
350
351
            'server': setType('server', str),
            'path': setType('path', str)
352
        }
chicm-ms's avatar
chicm-ms committed
353
    }, {
354
355
        Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'),
        Optional('serviceAccountName'): setType('serviceAccountName', str),
356
        'keyVault': {
357
358
359
360
            'vaultName': And(Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),\
                         error='ERROR: vaultName format error, vaultName support using (0-9|a-z|A-Z|-)'),
            'name': And(Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),\
                    error='ERROR: name format error, name support using (0-9|a-z|A-Z|-)')
361
362
        },
        'azureStorage': {
363
364
365
366
            'accountName': And(Regex('([0-9]|[a-z]|[A-Z]|-){3,31}'),\
                           error='ERROR: accountName format error, accountName support using (0-9|a-z|A-Z|-)'),
            'azureShare': And(Regex('([0-9]|[a-z]|[A-Z]|-){3,63}'),\
                          error='ERROR: azureShare format error, azureShare support using (0-9|a-z|A-Z|-)')
367
368
        },
        Optional('uploadRetryCount'): setNumberRange('uploadRetryCount', int, 1, 99999)
369
370
371
    })
}

372
machine_list_schema = {
chicm-ms's avatar
chicm-ms committed
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
    Optional('machineList'):[Or({
        'ip': setType('ip', str),
        Optional('port'): setNumberRange('port', int, 1, 65535),
        'username': setType('username', str),
        'passwd': setType('passwd', str),
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
        Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
        Optional('useActiveGpu'): setType('useActiveGpu', bool)
        }, {
            'ip': setType('ip', str),
            Optional('port'): setNumberRange('port', int, 1, 65535),
            'username': setType('username', str),
            'sshKeyPath': setPathCheck('sshKeyPath'),
            Optional('passphrase'): setType('passphrase', str),
            Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
            Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
            Optional('useActiveGpu'): setType('useActiveGpu', bool)
        })]
391
}
392
393
394

LOCAL_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema})

395
REMOTE_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema, **machine_list_schema})
396

397
398
399
PAI_CONFIG_SCHEMA = Schema({**common_schema, **pai_trial_schema, **pai_config_schema})

KUBEFLOW_CONFIG_SCHEMA = Schema({**common_schema, **kubeflow_trial_schema, **kubeflow_config_schema})
400
401

FRAMEWORKCONTROLLER_CONFIG_SCHEMA = Schema({**common_schema, **frameworkcontroller_trial_schema, **frameworkcontroller_config_schema})