kubeflowConfig.ts 6.22 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import { TrialConfig } from "../common/trialConfig";

/**
 * Copyright (c) Microsoft Corporation
 * All rights reserved.
 *
 * MIT License
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 * documentation files (the "Software"), to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and
 * to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
 * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

'use strict';


/** operator types that kubeflow supported */
26
27
28
29
export type KubeflowOperator = 'tf-operator' | 'pytorch-operator' ;
export type KubeflowOperatorPlural = 'tfjobs' | 'pytorchjobs' ;
export type KubeflowOperatorJobKind = 'TFJob' | 'PyTorchJob';
export type KubeflowStorageKind = 'nfs' | 'azureStorage';
30
31
32
33
34
35

/**
 * map from Kubeflow operator name to its plural name in K8S
 */
export const kubeflowOperatorMap : Map<KubeflowOperator, KubeflowOperatorPlural> =  new Map<KubeflowOperator, KubeflowOperatorPlural>([
    ['tf-operator' , 'tfjobs'],
36
37
38
39
40
41
42
43
44
    ['pytorch-operator', 'pytorchjobs'] 
]);

/**
 * map from Kubeflow operator name to its job kind name in K8S
 */
export const kubeflowOperatorJobKindMap : Map<KubeflowOperator, KubeflowOperatorJobKind> =  new Map<KubeflowOperator, KubeflowOperatorJobKind>([
    ['tf-operator' , 'TFJob'],
    ['pytorch-operator', 'PyTorchJob']
45
46
47
48
49
50
]);

/**
 * Kuberflow cluster configuration
 * 
 */
51
export class KubeflowClusterConfigBase {
52
53
    /** Name of Kubeflow operator, like tf-operator */
    public readonly operator: KubeflowOperator;
54
    public readonly storage?: KubeflowStorageKind;
SparkSnail's avatar
SparkSnail committed
55
    
56
57
58
59
60
61
    /**
     * Constructor
     * @param userName User name of Kubeflow Cluster
     * @param passWord password of Kubeflow Cluster
     * @param host Host IP of Kubeflow Cluster
     */
62
    constructor(operator: KubeflowOperator, storage?: KubeflowStorageKind) {
63
        this.operator = operator;
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
        this.storage = storage;
    }
}

export class KubeflowClusterConfigNFS extends KubeflowClusterConfigBase{
    public readonly nfs: NFSConfig;
    
    constructor(operator: KubeflowOperator, nfs: NFSConfig, storage?: KubeflowStorageKind) {
        super(operator, storage)
        this.nfs = nfs;
    }
}

export class KubeflowClusterConfigAzure extends KubeflowClusterConfigBase{
    public readonly keyVault: keyVaultConfig;
    public readonly azureStorage: AzureStorage;
    
    constructor(operator: KubeflowOperator, keyVault: keyVaultConfig, azureStorage: AzureStorage, storage?: KubeflowStorageKind) {
        super(operator, storage)
SparkSnail's avatar
SparkSnail committed
83
84
        this.keyVault = keyVault;
        this.azureStorage = azureStorage;
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    }
}

/**
 * NFS configuration to store Kubeflow job related files
 */
export class NFSConfig {
    /** IP Adress of NFS server */
    public readonly server : string;
    /** exported NFS path on NFS server */
    public readonly path : string;

    constructor(server : string, path : string) {
        this.server = server;
        this.path = path;
    }
}

SparkSnail's avatar
SparkSnail committed
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/**
 * KeyVault configuration to store the key of Azure Storage Service
 * Refer https://docs.microsoft.com/en-us/azure/key-vault/key-vault-manage-with-cli2
 */
export class keyVaultConfig {
    /**The vault-name to specify vault */
    public readonly vaultName : string;
    /**The name to specify private key */
    public readonly name : string;

    constructor(vaultName : string, name : string){
        this.vaultName = vaultName;
        this.name = name;
    }
}

/**
 * Azure Storage Service
 */
export class AzureStorage {
    /**The azure share to storage files */
    public readonly azureShare : string;
    
    /**The account name of sotrage service */
    public readonly accountName: string;
    constructor(azureShare : string, accountName: string){
        this.azureShare = azureShare;
        this.accountName = accountName;
    }
}

134
135
136
/**
 * Trial job configuration for Kubeflow
 */
137
138
139
140
141
export class KubeflowTrialConfigTemplate {
    /** replication number of current role */
    public readonly replicas: number;

    /** CPU number */
142
    public readonly cpuNum: number;
143
144

    /** Memory  */
145
    public readonly memoryMB: number;
146
147

    /** Docker image */
148
    public readonly image: string;
149
150
151
152
153
154

    /** Trail command */
    public readonly command : string;

    /** Required GPU number for trial job. The number should be in [0,100] */
    public readonly gpuNum : number;
155
    
156
157
158
159
160
    constructor(replicas: number, command : string, gpuNum : number, 
        cpuNum: number, memoryMB: number, image: string) {
        this.replicas = replicas;
        this.command = command;
        this.gpuNum = gpuNum;
161
162
163
164
        this.cpuNum = cpuNum;
        this.memoryMB = memoryMB;
        this.image = image;
    }
165
166
}

167
export class KubeflowTrialConfigBase {
168
    public readonly codeDir: string;
169
170
171
172
173
174
175

    constructor(codeDir: string) {
        this.codeDir = codeDir;
    }
}

export class KubeflowTrialConfigTensorflow extends KubeflowTrialConfigBase{
176
177
178
    public readonly ps?: KubeflowTrialConfigTemplate;
    public readonly worker: KubeflowTrialConfigTemplate;

179
180
    constructor(codeDir: string, worker: KubeflowTrialConfigTemplate,  ps?: KubeflowTrialConfigTemplate) {
        super(codeDir);
181
        this.ps = ps;
182
183
184
185
186
187
188
189
190
191
192
193
        this.worker = worker;
    }
}

export class KubeflowTrialConfigPytorch extends KubeflowTrialConfigBase{
    public readonly master?: KubeflowTrialConfigTemplate;
    public readonly worker: KubeflowTrialConfigTemplate;

    constructor(codeDir: string, worker: KubeflowTrialConfigTemplate,  master?: KubeflowTrialConfigTemplate) {
        super(codeDir);
        this.master = master;
        this.worker = worker;
194
    }
195
196
}