config_schema.py 16.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import os
from schema import Schema, And, Use, Optional, Regex, Or
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from .constants import SCHEMA_TYPE_ERROR, SCHEMA_RANGE_ERROR, SCHEMA_PATH_ERROR


def setType(key, type):
    '''check key type'''
    return And(type, error=SCHEMA_TYPE_ERROR % (key, type.__name__))

def setChoice(key, *args):
    '''check choice'''
    return And(lambda n: n in args, error=SCHEMA_RANGE_ERROR % (key, str(args)))

def setNumberRange(key, keyType, start, end):
    '''check number range'''
    return And(
        And(keyType, error=SCHEMA_TYPE_ERROR % (key, keyType.__name__)),
        And(lambda n: start <= n <= end, error=SCHEMA_RANGE_ERROR % (key, '(%s,%s)' % (start, end))),
    )

def setPathCheck(key):
    '''check if path exist'''
    return And(os.path.exists, error=SCHEMA_PATH_ERROR % key)
44

45
common_schema = {
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
    'authorName': setType('authorName', str),
    'experimentName': setType('experimentName', str),
    Optional('description'): setType('description', str),
    'trialConcurrency': setNumberRange('trialConcurrency', int, 1, 99999),
    Optional('maxExecDuration'): And(Regex(r'^[1-9][0-9]*[s|m|h|d]$',error='ERROR: maxExecDuration format is [digit]{s,m,h,d}')),
    Optional('maxTrialNum'): setNumberRange('maxTrialNum', int, 1, 99999),
    'trainingServicePlatform': setChoice('trainingServicePlatform', 'remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller'),
    Optional('searchSpacePath'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'searchSpacePath'),
    Optional('multiPhase'): setType('multiPhase', bool),
    Optional('multiThread'): setType('multiThread', bool),
    Optional('nniManagerIp'): setType('nniManagerIp', str),
    Optional('logDir'): And(os.path.isdir, error=SCHEMA_PATH_ERROR % 'logDir'),
    Optional('debug'): setType('debug', bool),
    Optional('logLevel'): setChoice('logLevel', 'trace', 'debug', 'info', 'warning', 'error', 'fatal'),
    Optional('logCollection'): setChoice('logCollection', 'http', 'none'),
    'useAnnotation': setType('useAnnotation', bool),
    Optional('tuner'): dict,
    Optional('advisor'): dict,
    Optional('assessor'): dict,
    Optional('localConfig'): {
66
67
68
        Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
        Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
        Optional('useActiveGpu'): setType('useActiveGpu', bool)
69
70
71
72
73
74
75
76
77
78
    }
}
tuner_schema_dict = {
    ('TPE', 'Anneal', 'SMAC', 'Evolution'): {
        'builtinTunerName': setChoice('builtinTunerName', 'TPE', 'Anneal', 'SMAC', 'Evolution'),
        Optional('classArgs'): {
            'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
        },
        Optional('includeIntermediateResults'): setType('includeIntermediateResults', bool),
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
QuanluZhang's avatar
QuanluZhang committed
79
    },
80
81
82
    ('BatchTuner', 'GridSearch', 'Random'): {
        'builtinTunerName': setChoice('builtinTunerName', 'BatchTuner', 'GridSearch', 'Random'),
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
Shufan Huang's avatar
Shufan Huang committed
83
    },
84
85
86
87
88
89
90
91
92
93
    'NetworkMorphism': {
        'builtinTunerName': 'NetworkMorphism',
        'classArgs': {
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('task'): setChoice('task', 'cv','nlp','common'),
            Optional('input_width'): setType('input_width', int),
            Optional('input_channel'): setType('input_channel', int),
            Optional('n_output_node'): setType('n_output_node', int),
            },
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
94
    },
95
96
97
98
99
100
101
102
103
104
105
    'MetisTuner': {
        'builtinTunerName': 'MetisTuner',
        'classArgs': {
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('no_resampling'): setType('no_resampling', bool),
            Optional('no_candidates'): setType('no_candidates', bool),
            Optional('selection_num_starting_points'):  setType('selection_num_starting_points', int),
            Optional('cold_start_num'): setType('cold_start_num', int),
            },
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
    },
suiguoxin's avatar
suiguoxin committed
106
107
108
109
    'GPTuner': {
        'builtinTunerName': 'GPTuner',
        'classArgs': {
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
110
111
112
113
114
            Optional('utility'): setChoice('utility', 'ei', 'ucb', 'poi'),
            Optional('kappa'): setType('kappa', float),
            Optional('xi'): setType('xi', float),
            Optional('nu'): setType('nu', float),
            Optional('alpha'): setType('alpha', float),
suiguoxin's avatar
suiguoxin committed
115
            Optional('cold_start_num'): setType('cold_start_num', int),
116
117
            Optional('selection_num_warm_up'):  setType('selection_num_warm_up', int),
            Optional('selection_num_starting_points'):  setType('selection_num_starting_points', int),
suiguoxin's avatar
suiguoxin committed
118
119
120
            },
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
    },
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
    'customized': {
        'codeDir': setPathCheck('codeDir'),
        'classFileName': setType('classFileName', str),
        'className': setType('className', str),
        Optional('classArgs'): dict,
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
    }
}

advisor_schema_dict = {
    'Hyperband':{
        'builtinAdvisorName': Or('Hyperband'),
        'classArgs': {
            'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('R'): setType('R', int),
            Optional('eta'): setType('eta', int)
xuehui's avatar
xuehui committed
137
        },
138
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
139
    },
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
    'BOHB':{
        'builtinAdvisorName': Or('BOHB'),
        'classArgs': {
            'optimize_mode': setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('min_budget'): setNumberRange('min_budget', int, 0, 9999),
            Optional('max_budget'): setNumberRange('max_budget', int, 0, 9999),
            Optional('eta'):setNumberRange('eta', int, 0, 9999), 
            Optional('min_points_in_model'): setNumberRange('min_points_in_model', int, 0, 9999),
            Optional('top_n_percent'): setNumberRange('top_n_percent', int, 1, 99),
            Optional('num_samples'): setNumberRange('num_samples', int, 1, 9999),
            Optional('random_fraction'): setNumberRange('random_fraction', float, 0, 9999),
            Optional('bandwidth_factor'): setNumberRange('bandwidth_factor', float, 0, 9999),
            Optional('min_bandwidth'): setNumberRange('min_bandwidth', float, 0, 9999),
        },
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
155
    },
156
157
158
159
160
161
162
    'customized':{
        'codeDir': setPathCheck('codeDir'),
        'classFileName': setType('classFileName', str),
        'className': setType('className', str),
        Optional('classArgs'): dict,
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
    }
163
}
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191

assessor_schema_dict = {
    'Medianstop': {
        'builtinAssessorName': 'Medianstop',
        Optional('classArgs'): {
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('start_step'): setNumberRange('start_step', int, 0, 9999),
        },
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
    },
    'Curvefitting': {
        'builtinAssessorName': 'Curvefitting',
        Optional('classArgs'): {
            'epoch_num': setNumberRange('epoch_num', int, 0, 9999),
            Optional('optimize_mode'): setChoice('optimize_mode', 'maximize', 'minimize'),
            Optional('start_step'): setNumberRange('start_step', int, 0, 9999),
            Optional('threshold'): setNumberRange('threshold', float, 0, 9999),
            Optional('gap'): setNumberRange('gap', int, 1, 9999),
        },
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999),
    },
    'customized': {
        'codeDir': setPathCheck('codeDir'),
        'classFileName': setType('classFileName', str),
        'className': setType('className', str),
        Optional('classArgs'): dict,
        Optional('gpuNum'): setNumberRange('gpuNum', int, 0, 99999)
    }
192
193
194
195
}

common_trial_schema = {
'trial':{
196
197
198
    'command': setType('command', str),
    'codeDir': setPathCheck('codeDir'),
    'gpuNum': setNumberRange('gpuNum', int, 0, 99999)
199
200
201
202
203
    }
}

pai_trial_schema = {
'trial':{
204
205
206
207
208
209
210
211
212
213
214
215
    'command': setType('command', str),
    'codeDir': setPathCheck('codeDir'),
    'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
    'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
    'memoryMB': setType('memoryMB', int),
    'image': setType('image', str),
    Optional('shmMB'): setType('shmMB', int),
    Optional('dataDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
                         error='ERROR: dataDir format error, dataDir format is hdfs://xxx.xxx.xxx.xxx:xxx'),
    Optional('outputDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
                         error='ERROR: outputDir format error, outputDir format is hdfs://xxx.xxx.xxx.xxx:xxx'),
    Optional('virtualCluster'): setType('virtualCluster', str),
216
217
218
219
    }
}

pai_config_schema = {
220
221
222
223
224
    'paiConfig':{
        'userName': setType('userName', str),
        'passWord': setType('passWord', str),
        'host': setType('host', str)
    }
225
226
}

227
228
kubeflow_trial_schema = {
'trial':{
229
        'codeDir':  setPathCheck('codeDir'),
230
        Optional('ps'): {
231
232
233
234
235
236
            'replicas': setType('replicas', int),
            'command': setType('command', str),
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
            'image': setType('image', str)
237
        },
238
        Optional('master'): {
239
240
241
242
243
244
            'replicas': setType('replicas', int),
            'command': setType('command', str),
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
            'image': setType('image', str)
245
        },
246
        Optional('worker'):{
247
248
249
250
251
252
            'replicas': setType('replicas', int),
            'command': setType('command', str),
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
            'image': setType('image', str)
253
        } 
254
255
256
257
    }
}

kubeflow_config_schema = {
SparkSnail's avatar
SparkSnail committed
258
    'kubeflowConfig':Or({
259
260
261
        'operator': setChoice('operator', 'tf-operator', 'pytorch-operator'),
        'apiVersion': setType('apiVersion', str),
        Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'),
262
        'nfs': {
263
264
            'server': setType('server', str),
            'path': setType('path', str)
265
        }
SparkSnail's avatar
SparkSnail committed
266
    },{
267
268
269
        'operator': setChoice('operator', 'tf-operator', 'pytorch-operator'),
        'apiVersion': setType('apiVersion', str),
        Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'),
SparkSnail's avatar
SparkSnail committed
270
        'keyVault': {
271
272
273
274
            'vaultName': And(Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),\
                         error='ERROR: vaultName format error, vaultName support using (0-9|a-z|A-Z|-)'),
            'name': And(Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),\
                    error='ERROR: name format error, name support using (0-9|a-z|A-Z|-)')
SparkSnail's avatar
SparkSnail committed
275
276
        },
        'azureStorage': {
277
278
279
280
            'accountName': And(Regex('([0-9]|[a-z]|[A-Z]|-){3,31}'),\
                           error='ERROR: accountName format error, accountName support using (0-9|a-z|A-Z|-)'),
            'azureShare': And(Regex('([0-9]|[a-z]|[A-Z]|-){3,63}'),\
                          error='ERROR: azureShare format error, azureShare support using (0-9|a-z|A-Z|-)')
SparkSnail's avatar
SparkSnail committed
281
282
        }
    })
283
284
}

285
286
frameworkcontroller_trial_schema = {
    'trial':{
287
        'codeDir':  setPathCheck('codeDir'),
288
        'taskRoles': [{
289
290
            'name': setType('name', str),
            'taskNum': setType('taskNum', int),
291
            'frameworkAttemptCompletionPolicy': {
292
293
                'minFailedTaskCount': setType('minFailedTaskCount', int),
                'minSucceededTaskCount': setType('minSucceededTaskCount', int),
294
            },
295
296
297
298
299
            'command': setType('command', str),
            'gpuNum': setNumberRange('gpuNum', int, 0, 99999),
            'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
            'memoryMB': setType('memoryMB', int),
            'image': setType('image', str)
300
301
302
303
304
305
        }]
    }
}

frameworkcontroller_config_schema = {
    'frameworkcontrollerConfig':Or({
306
307
        Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'),
        Optional('serviceAccountName'): setType('serviceAccountName', str),
308
        'nfs': {
309
310
            'server': setType('server', str),
            'path': setType('path', str)
311
312
        }
    },{
313
314
        Optional('storage'): setChoice('storage', 'nfs', 'azureStorage'),
        Optional('serviceAccountName'): setType('serviceAccountName', str),
315
        'keyVault': {
316
317
318
319
            'vaultName': And(Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),\
                         error='ERROR: vaultName format error, vaultName support using (0-9|a-z|A-Z|-)'),
            'name': And(Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),\
                    error='ERROR: name format error, name support using (0-9|a-z|A-Z|-)')
320
321
        },
        'azureStorage': {
322
323
324
325
            'accountName': And(Regex('([0-9]|[a-z]|[A-Z]|-){3,31}'),\
                           error='ERROR: accountName format error, accountName support using (0-9|a-z|A-Z|-)'),
            'azureShare': And(Regex('([0-9]|[a-z]|[A-Z]|-){3,63}'),\
                          error='ERROR: azureShare format error, azureShare support using (0-9|a-z|A-Z|-)')
326
327
328
329
        }
    })
}

330
machine_list_schema = {
331
Optional('machineList'):[Or({
332
333
334
335
    'ip': setType('ip', str),
    Optional('port'): setNumberRange('port', int, 1, 65535),
    'username': setType('username', str),
    'passwd': setType('passwd', str),
336
337
338
    Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
    Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
    Optional('useActiveGpu'): setType('useActiveGpu', bool)
339
    },{
340
341
342
343
344
    'ip': setType('ip', str),
    Optional('port'): setNumberRange('port', int, 1, 65535),
    'username': setType('username', str),
    'sshKeyPath': setPathCheck('sshKeyPath'),
    Optional('passphrase'): setType('passphrase', str),
345
346
347
    Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
    Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
    Optional('useActiveGpu'): setType('useActiveGpu', bool)
348
})]
349
}
350
351
352

LOCAL_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema})

353
REMOTE_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema, **machine_list_schema})
354

355
356
357
PAI_CONFIG_SCHEMA = Schema({**common_schema, **pai_trial_schema, **pai_config_schema})

KUBEFLOW_CONFIG_SCHEMA = Schema({**common_schema, **kubeflow_trial_schema, **kubeflow_config_schema})
358
359

FRAMEWORKCONTROLLER_CONFIG_SCHEMA = Schema({**common_schema, **frameworkcontroller_trial_schema, **frameworkcontroller_config_schema})