config_schema.py 8.84 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

import os
from schema import Schema, And, Use, Optional, Regex, Or

24
common_schema = {
25
26
'authorName': str,
'experimentName': str,
27
Optional('description'): str,
28
'trialConcurrency': And(int, lambda n: 1 <=n <= 999999),
29
30
Optional('maxExecDuration'): Regex(r'^[1-9][0-9]*[s|m|h|d]$'),
Optional('maxTrialNum'): And(int, lambda x: 1 <= x <= 99999),
31
'trainingServicePlatform': And(str, lambda x: x in ['remote', 'local', 'pai', 'kubeflow', 'frameworkcontroller']),
32
Optional('searchSpacePath'): os.path.exists,
chicm-ms's avatar
chicm-ms committed
33
Optional('multiPhase'): bool,
chicm-ms's avatar
chicm-ms committed
34
Optional('multiThread'): bool,
35
Optional('nniManagerIp'): str,
36
Optional('logDir'): os.path.isdir,
37
Optional('debug'): bool,
38
Optional('logLevel'): Or('trace', 'debug', 'info', 'warning', 'error', 'fatal'),
39
'useAnnotation': bool,
QuanluZhang's avatar
QuanluZhang committed
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
Optional('advisor'): Or({
    'builtinAdvisorName': Or('Hyperband'),
    'classArgs': {
        'optimize_mode': Or('maximize', 'minimize'),
        Optional('R'): int,
        Optional('eta'): int
    },
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
},{
    'codeDir': os.path.exists,
    'classFileName': str,
    'className': str,
    Optional('classArgs'): dict,
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
}),
Optional('tuner'): Or({
56
57
58
59
60
61
62
    'builtinTunerName': Or('TPE', 'Random', 'Anneal', 'SMAC', 'Evolution'),
    Optional('classArgs'): {
        'optimize_mode': Or('maximize', 'minimize')
    },
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
},{
    'builtinTunerName': Or('BatchTuner', 'GridSearch'),
63
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
Lee's avatar
Lee committed
64
65
66
67
68
69
70
71
72
73
},{
    'builtinTunerName': 'NetworkMorphism',
    'classArgs': {
        Optional('optimize_mode'): Or('maximize', 'minimize'),
        Optional('task'): And(str, lambda x: x in ['cv','nlp','common']),
        Optional('input_width'):  int,
        Optional('input_channel'):  int,
        Optional('n_output_node'):  int,
        },
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
xuehui's avatar
xuehui committed
74
75
76
77
78
79
80
81
82
83
},{
    'builtinTunerName': 'MetisTuner',
    'classArgs': {
        Optional('optimize_mode'): Or('maximize', 'minimize'),
        Optional('no_resampling'):  bool,
        Optional('no_candidates'):  bool,
        Optional('selection_num_starting_points'):  int,
        Optional('cold_start_num'):  int,
        },
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
84
85
86
87
},{
    'codeDir': os.path.exists,
    'classFileName': str,
    'className': str,
88
    Optional('classArgs'): dict,
89
90
91
92
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
}),
Optional('assessor'): Or({
    'builtinAssessorName': lambda x: x in ['Medianstop'],
93
94
95
96
    Optional('classArgs'): {
        Optional('optimize_mode'): Or('maximize', 'minimize'),
        Optional('start_step'): And(int, lambda x: 0 <= x <= 9999)
    },
chicm-ms's avatar
chicm-ms committed
97
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999)
98
99
100
101
102
103
},{
    'builtinAssessorName': lambda x: x in ['Curvefitting'],
    Optional('classArgs'): {
        'epoch_num': And(int, lambda x: 0 <= x <= 9999),
        Optional('optimize_mode'): Or('maximize', 'minimize'),
        Optional('start_step'): And(int, lambda x: 0 <= x <= 9999),
104
105
        Optional('threshold'): And(float, lambda x: 0.0 <= x <= 9999.0),
        Optional('gap'): And(int, lambda x: 1 <= x <= 9999)
106
107
    },
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999)
108
109
110
111
},{
    'codeDir': os.path.exists,
    'classFileName': str,
    'className': str,
112
113
    Optional('classArgs'): dict,
    Optional('gpuNum'): And(int, lambda x: 0 <= x <= 99999),
114
}),
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
}

common_trial_schema = {
'trial':{
    'command': str,
    'codeDir': os.path.exists,
    'gpuNum': And(int, lambda x: 0 <= x <= 99999)
    }
}

pai_trial_schema = {
'trial':{
    'command': str,
    'codeDir': os.path.exists,
    'gpuNum': And(int, lambda x: 0 <= x <= 99999),
    'cpuNum': And(int, lambda x: 0 <= x <= 99999),
    'memoryMB': int,
    'image': str,
133
    Optional('shmMB'): int,
fishyds's avatar
fishyds committed
134
    Optional('dataDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),
135
136
    Optional('outputDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),
    Optional('virtualCluster'): str
137
138
139
140
141
142
143
144
145
146
147
    }
}

pai_config_schema = {
'paiConfig':{
  'userName': str,
  'passWord': str,
  'host': str
}
}

148
149
kubeflow_trial_schema = {
'trial':{
150
151
152
153
154
155
156
157
158
        'codeDir':  os.path.exists,
        Optional('ps'): {
            'replicas': int,
            'command': str,
            'gpuNum': And(int, lambda x: 0 <= x <= 99999),
            'cpuNum': And(int, lambda x: 0 <= x <= 99999),
            'memoryMB': int,
            'image': str
        },
159
160
161
162
163
164
165
166
        Optional('master'): {
            'replicas': int,
            'command': str,
            'gpuNum': And(int, lambda x: 0 <= x <= 99999),
            'cpuNum': And(int, lambda x: 0 <= x <= 99999),
            'memoryMB': int,
            'image': str
        },
167
        Optional('worker'):{
168
169
170
171
172
173
174
            'replicas': int,
            'command': str,
            'gpuNum': And(int, lambda x: 0 <= x <= 99999),
            'cpuNum': And(int, lambda x: 0 <= x <= 99999),
            'memoryMB': int,
            'image': str
        } 
175
176
177
178
    }
}

kubeflow_config_schema = {
SparkSnail's avatar
SparkSnail committed
179
    'kubeflowConfig':Or({
180
        'operator': Or('tf-operator', 'pytorch-operator'),
181
        'apiVersion': str,
182
        Optional('storage'): Or('nfs', 'azureStorage'),
183
184
185
        'nfs': {
            'server': str,
            'path': str
186
        }
SparkSnail's avatar
SparkSnail committed
187
    },{
188
        'operator': Or('tf-operator', 'pytorch-operator'),
189
        'apiVersion': str,
190
        Optional('storage'): Or('nfs', 'azureStorage'),
SparkSnail's avatar
SparkSnail committed
191
192
193
194
195
196
197
198
199
        'keyVault': {
            'vaultName': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),
            'name': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}')
        },
        'azureStorage': {
            'accountName': Regex('([0-9]|[a-z]|[A-Z]|-){3,31}'),
            'azureShare': Regex('([0-9]|[a-z]|[A-Z]|-){3,63}')
        }
    })
200
201
}

202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
frameworkcontroller_trial_schema = {
    'trial':{
        'codeDir':  os.path.exists,
        'taskRoles': [{
            'name': str,
            'taskNum': int,
            'frameworkAttemptCompletionPolicy': {
                'minFailedTaskCount': int,
                'minSucceededTaskCount': int
            },
            'command': str,
            'gpuNum': And(int, lambda x: 0 <= x <= 99999),
            'cpuNum': And(int, lambda x: 0 <= x <= 99999),
            'memoryMB': int,
            'image': str
        }]
    }
}

frameworkcontroller_config_schema = {
    'frameworkcontrollerConfig':Or({
        Optional('storage'): Or('nfs', 'azureStorage'),
224
        Optional('serviceAccountName'): str,
225
226
227
228
229
230
        'nfs': {
            'server': str,
            'path': str
        }
    },{
        Optional('storage'): Or('nfs', 'azureStorage'),
231
        Optional('serviceAccountName'): str,
232
233
234
235
236
237
238
239
240
241
242
243
        'keyVault': {
            'vaultName': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}'),
            'name': Regex('([0-9]|[a-z]|[A-Z]|-){1,127}')
        },
        'azureStorage': {
            'accountName': Regex('([0-9]|[a-z]|[A-Z]|-){3,31}'),
            'azureShare': Regex('([0-9]|[a-z]|[A-Z]|-){3,63}')
        }
    })
}


244
machine_list_schima = {
245
246
Optional('machineList'):[Or({
    'ip': str,
SparkSnail's avatar
SparkSnail committed
247
    Optional('port'): And(int, lambda x: 0 < x < 65535),
248
249
250
251
    'username': str,
    'passwd': str
    },{
    'ip': str,
SparkSnail's avatar
SparkSnail committed
252
    Optional('port'): And(int, lambda x: 0 < x < 65535),
253
254
255
    'username': str,
    'sshKeyPath': os.path.exists,
    Optional('passphrase'): str
256
})]
257
}
258
259
260
261
262

LOCAL_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema})

REMOTE_CONFIG_SCHEMA = Schema({**common_schema, **common_trial_schema, **machine_list_schima})

263
264
265
PAI_CONFIG_SCHEMA = Schema({**common_schema, **pai_trial_schema, **pai_config_schema})

KUBEFLOW_CONFIG_SCHEMA = Schema({**common_schema, **kubeflow_trial_schema, **kubeflow_config_schema})
266
267

FRAMEWORKCONTROLLER_CONFIG_SCHEMA = Schema({**common_schema, **frameworkcontroller_trial_schema, **frameworkcontroller_config_schema})