training_service.yml 761 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
kubeflow:
  maxExecDuration: 15m
  nniManagerIp:
  kubeflowConfig:
    operator: tf-operator
    apiVersion: v1alpha2
    storage: azureStorage
    keyVault:
      vaultName:
      name:
    azureStorage:
      accountName:
      azureShare:
  trial:
    worker:
      replicas: 1
      command:
      gpuNum: 1
      cpuNum: 1
      memoryMB: 8192
      image:
  trainingServicePlatform: kubeflow

24
25
26
local:
  trainingServicePlatform: local
pai:
27
28
  nniManagerIp:
  maxExecDuration: 15m
29
  paiConfig:
30
31
32
33
    host:
    passWord:
    userName:
  trainingServicePlatform: pai
34
  trial:
35
36
37
38
39
40
41
42
43
44
45
46
47
    gpuNum: 1
    cpuNum: 1
    dataDir:
    image:
    memoryMB: 8192
    outputDir:
remote:
  machineList:
  - ip:
    passwd:
    port:
    username:
  trainingServicePlatform: remote