config_kubeflow.yml 1.06 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
authorName: default
experimentName: example_mnist
trialConcurrency: 2
maxExecDuration: 1h
maxTrialNum: 20
#choice: local, remote, pai, kubeflow
trainingServicePlatform: kubeflow
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
  #choice: TPE, Random, Anneal, Evolution
  builtinTunerName: TPE
  classArgs:
    #choice: maximize, minimize
    optimize_mode: maximize
assessor:
  builtinAssessorName: Medianstop
  classArgs:
    optimize_mode: maximize
  gpuNum: 0
trial:
  codeDir: .
  worker:
    replicas: 2
    command: python3 dist_mnist.py
    gpuNum: 1
    cpuNum: 1
    memoryMB: 8196
    image: msranni/nni:latest
  ps:
    replicas: 1
    command: python3 dist_mnist.py
    gpuNum: 0
    cpuNum: 1
    memoryMB: 8196
    image: msranni/nni:latest
kubeflowConfig:
  operator: tf-operator
  nfs:
    # Your NFS server IP, like 10.10.10.10
    server: {your_nfs_server_ip}
    # Your NFS server export path, like /var/nfs/nni
    path: {your_nfs_server_export_path}
  kubernetesServer: 10.10.10.10