Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
88ef6c04
Unverified
Commit
88ef6c04
authored
Aug 01, 2019
by
SparkSnail
Committed by
GitHub
Aug 01, 2019
Browse files
Merge pull request #197 from microsoft/master
merge master
parents
5f3c5ffd
555334de
Changes
48
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
951 additions
and
61 deletions
+951
-61
docs/img/parallel_tpe_search_result.PNG
docs/img/parallel_tpe_search_result.PNG
+0
-0
docs/img/parallel_tpe_search_tpe.PNG
docs/img/parallel_tpe_search_tpe.PNG
+0
-0
examples/notebooks/retrieve_nni_info_with_python.ipynb
examples/notebooks/retrieve_nni_info_with_python.ipynb
+497
-0
examples/trials/auto-feature-engineering/README.md
examples/trials/auto-feature-engineering/README.md
+8
-0
examples/tuners/random_nas_tuner/random_nas_tuner.py
examples/tuners/random_nas_tuner/random_nas_tuner.py
+3
-1
setup.py
setup.py
+2
-1
src/nni_manager/rest_server/restValidationSchemas.ts
src/nni_manager/rest_server/restValidationSchemas.ts
+5
-0
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts
...bernetes/frameworkcontroller/frameworkcontrollerConfig.ts
+2
-2
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts
...frameworkcontroller/frameworkcontrollerTrainingService.ts
+25
-14
src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts
...er/training_service/kubernetes/kubeflow/kubeflowConfig.ts
+2
-2
src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts
...ng_service/kubernetes/kubeflow/kubeflowTrainingService.ts
+45
-32
src/nni_manager/training_service/kubernetes/kubernetesConfig.ts
...i_manager/training_service/kubernetes/kubernetesConfig.ts
+5
-1
src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts
.../training_service/kubernetes/kubernetesTrainingService.ts
+31
-0
src/nni_manager/training_service/pai/paiConfig.ts
src/nni_manager/training_service/pai/paiConfig.ts
+8
-2
src/nni_manager/training_service/pai/paiTrainingService.ts
src/nni_manager/training_service/pai/paiTrainingService.ts
+4
-2
src/sdk/pycli/nnicli/__init__.py
src/sdk/pycli/nnicli/__init__.py
+21
-0
src/sdk/pycli/nnicli/nni_client.py
src/sdk/pycli/nnicli/nni_client.py
+156
-0
src/sdk/pycli/setup.py
src/sdk/pycli/setup.py
+18
-0
src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
+63
-4
test/cli_test.py
test/cli_test.py
+56
-0
No files found.
docs/img/parallel_tpe_search_result.PNG
0 → 100644
View file @
88ef6c04
363 KB
docs/img/parallel_tpe_search_tpe.PNG
0 → 100644
View file @
88ef6c04
9.5 KB
examples/notebooks/retrieve_nni_info_with_python.ipynb
0 → 100644
View file @
88ef6c04
This diff is collapsed.
Click to expand it.
examples/trials/auto-feature-engineering/README.md
0 → 100644
View file @
88ef6c04
**Automatic Feature Engineering in nni**
===
Now we have an
[
example
](
https://github.com/SpongebBob/tabular_automl_NNI
)
, which could automaticlly do feature engineering in nni.
These code come from our contributors. And thanks our lovely contributors!
And welcome more and more people to join us!
examples/tuners/random_nas_tuner/random_nas_tuner.py
View file @
88ef6c04
...
...
@@ -7,7 +7,9 @@ def random_archi_generator(nas_ss, random_state):
'''
chosen_archi
=
{}
print
(
"zql: nas search space: "
,
nas_ss
)
for
block_name
,
block
in
nas_ss
.
items
():
for
block_name
,
block_value
in
nas_ss
.
items
():
assert
block_value
[
'_type'
]
==
"mutable_layer"
,
"Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
block
=
block_value
[
'_value'
]
tmp_block
=
{}
for
layer_name
,
layer
in
block
.
items
():
tmp_layer
=
{}
...
...
setup.py
View file @
88ef6c04
...
...
@@ -35,9 +35,10 @@ setup(
license
=
'MIT'
,
url
=
'https://github.com/Microsoft/nni'
,
packages
=
find_packages
(
'src/sdk/pynni'
,
exclude
=
[
'tests'
])
+
find_packages
(
'tools'
),
packages
=
find_packages
(
'src/sdk/pynni'
,
exclude
=
[
'tests'
])
+
find_packages
(
'src/sdk/pycli'
)
+
find_packages
(
'tools'
),
package_dir
=
{
'nni'
:
'src/sdk/pynni/nni'
,
'nnicli'
:
'src/sdk/pycli/nnicli'
,
'nni_annotation'
:
'tools/nni_annotation'
,
'nni_cmd'
:
'tools/nni_cmd'
,
'nni_trial_tool'
:
'tools/nni_trial_tool'
,
...
...
src/nni_manager/rest_server/restValidationSchemas.ts
View file @
88ef6c04
...
...
@@ -51,10 +51,12 @@ export namespace ValidationSchemas {
command
:
joi
.
string
().
min
(
1
),
virtualCluster
:
joi
.
string
(),
shmMB
:
joi
.
number
(),
authFile
:
joi
.
string
(),
nasMode
:
joi
.
string
().
valid
(
'
classic_mode
'
,
'
enas_mode
'
,
'
oneshot_mode
'
),
worker
:
joi
.
object
({
replicas
:
joi
.
number
().
min
(
1
).
required
(),
image
:
joi
.
string
().
min
(
1
),
privateRegistryAuthPath
:
joi
.
string
().
min
(
1
),
outputDir
:
joi
.
string
(),
cpuNum
:
joi
.
number
().
min
(
1
),
memoryMB
:
joi
.
number
().
min
(
100
),
...
...
@@ -64,6 +66,7 @@ export namespace ValidationSchemas {
ps
:
joi
.
object
({
replicas
:
joi
.
number
().
min
(
1
).
required
(),
image
:
joi
.
string
().
min
(
1
),
privateRegistryAuthPath
:
joi
.
string
().
min
(
1
),
outputDir
:
joi
.
string
(),
cpuNum
:
joi
.
number
().
min
(
1
),
memoryMB
:
joi
.
number
().
min
(
100
),
...
...
@@ -73,6 +76,7 @@ export namespace ValidationSchemas {
master
:
joi
.
object
({
replicas
:
joi
.
number
().
min
(
1
).
required
(),
image
:
joi
.
string
().
min
(
1
),
privateRegistryAuthPath
:
joi
.
string
().
min
(
1
),
outputDir
:
joi
.
string
(),
cpuNum
:
joi
.
number
().
min
(
1
),
memoryMB
:
joi
.
number
().
min
(
100
),
...
...
@@ -83,6 +87,7 @@ export namespace ValidationSchemas {
name
:
joi
.
string
().
min
(
1
),
taskNum
:
joi
.
number
().
min
(
1
).
required
(),
image
:
joi
.
string
().
min
(
1
),
privateRegistryAuthPath
:
joi
.
string
().
min
(
1
),
outputDir
:
joi
.
string
(),
cpuNum
:
joi
.
number
().
min
(
1
),
memoryMB
:
joi
.
number
().
min
(
100
),
...
...
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts
View file @
88ef6c04
...
...
@@ -43,8 +43,8 @@ export class FrameworkControllerTrialConfigTemplate extends KubernetesTrialConfi
public
readonly
taskNum
:
number
;
constructor
(
taskNum
:
number
,
command
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
frameworkAttemptCompletionPolicy
:
FrameworkAttemptCompletionPolicy
)
{
super
(
command
,
gpuNum
,
cpuNum
,
memoryMB
,
image
);
frameworkAttemptCompletionPolicy
:
FrameworkAttemptCompletionPolicy
,
privateRegistryFilePath
?:
string
|
undefined
)
{
super
(
command
,
gpuNum
,
cpuNum
,
memoryMB
,
image
,
privateRegistryFilePath
);
this
.
frameworkAttemptCompletionPolicy
=
frameworkAttemptCompletionPolicy
;
this
.
name
=
name
;
this
.
taskNum
=
taskNum
;
...
...
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts
View file @
88ef6c04
...
...
@@ -305,7 +305,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
// Generate frameworkcontroller job resource config object
const
frameworkcontrollerJobConfig
:
any
=
this
.
generateFrameworkControllerJobConfig
(
trialJobId
,
trialWorkingFolder
,
frameworkcontrollerJobName
,
podResources
);
await
this
.
generateFrameworkControllerJobConfig
(
trialJobId
,
trialWorkingFolder
,
frameworkcontrollerJobName
,
podResources
);
return
Promise
.
resolve
(
frameworkcontrollerJobConfig
);
}
...
...
@@ -329,8 +329,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
* @param frameworkcontrollerJobName job name
* @param podResources pod template
*/
private
generateFrameworkControllerJobConfig
(
trialJobId
:
string
,
trialWorkingFolder
:
string
,
frameworkcontrollerJobName
:
string
,
podResources
:
any
)
:
any
{
private
async
generateFrameworkControllerJobConfig
(
trialJobId
:
string
,
trialWorkingFolder
:
string
,
frameworkcontrollerJobName
:
string
,
podResources
:
any
)
:
Promise
<
any
>
{
if
(
this
.
fcClusterConfig
===
undefined
)
{
throw
new
Error
(
'
frameworkcontroller Cluster config is not initialized
'
);
}
...
...
@@ -345,12 +345,14 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
if
(
containerPort
===
undefined
)
{
throw
new
Error
(
'
Container port is not initialized
'
);
}
const
taskRole
:
any
=
this
.
generateTaskRoleConfig
(
trialWorkingFolder
,
this
.
fcTrialConfig
.
taskRoles
[
index
].
image
,
`run_
${
this
.
fcTrialConfig
.
taskRoles
[
index
].
name
}
.sh`
,
podResources
[
index
],
containerPort
containerPort
,
await
this
.
createRegistrySecret
(
this
.
fcTrialConfig
.
taskRoles
[
index
].
privateRegistryAuthPath
)
);
taskRoles
.
push
({
name
:
this
.
fcTrialConfig
.
taskRoles
[
index
].
name
,
...
...
@@ -363,7 +365,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
});
}
return
{
return
Promise
.
resolve
(
{
apiVersion
:
`frameworkcontroller.microsoft.com/v1`
,
kind
:
'
Framework
'
,
metadata
:
{
...
...
@@ -379,11 +381,11 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
executionType
:
'
Start
'
,
taskRoles
:
taskRoles
}
};
}
)
;
}
private
generateTaskRoleConfig
(
trialWorkingFolder
:
string
,
replicaImage
:
string
,
runScriptFile
:
string
,
podResources
:
any
,
containerPort
:
number
):
any
{
podResources
:
any
,
containerPort
:
number
,
privateRegistrySecretName
:
string
|
undefined
):
any
{
if
(
this
.
fcClusterConfig
===
undefined
)
{
throw
new
Error
(
'
frameworkcontroller Cluster config is not initialized
'
);
}
...
...
@@ -451,13 +453,22 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
mountPath
:
'
/mnt/frameworkbarrier
'
}]
}];
const
spec
:
any
=
{
let
spec
:
any
=
{
containers
:
containers
,
initContainers
:
initContainers
,
restartPolicy
:
'
OnFailure
'
,
volumes
:
volumeSpecMap
.
get
(
'
nniVolumes
'
),
hostNetwork
:
false
};
if
(
privateRegistrySecretName
)
{
spec
.
imagePullSecrets
=
[
{
name
:
privateRegistrySecretName
}
]
}
if
(
this
.
fcClusterConfig
.
serviceAccountName
!==
undefined
)
{
spec
.
serviceAccountName
=
this
.
fcClusterConfig
.
serviceAccountName
;
}
...
...
src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts
View file @
88ef6c04
...
...
@@ -135,8 +135,8 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig {
export
class
KubeflowTrialConfigTemplate
extends
KubernetesTrialConfigTemplate
{
public
readonly
replicas
:
number
;
constructor
(
replicas
:
number
,
command
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
)
{
super
(
command
,
gpuNum
,
cpuNum
,
memoryMB
,
image
);
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
privateRegistryAuthPath
?:
string
)
{
super
(
command
,
gpuNum
,
cpuNum
,
memoryMB
,
image
,
privateRegistryAuthPath
);
this
.
replicas
=
replicas
;
}
}
...
...
src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts
View file @
88ef6c04
...
...
@@ -347,7 +347,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
// Generate kubeflow job resource config object
const
kubeflowJobConfig
:
any
=
this
.
generateKubeflowJobConfig
(
trialJobId
,
trialWorkingFolder
,
kubeflowJobName
,
workerPodResources
,
const
kubeflowJobConfig
:
any
=
await
this
.
generateKubeflowJobConfig
(
trialJobId
,
trialWorkingFolder
,
kubeflowJobName
,
workerPodResources
,
nonWorkerResources
);
return
Promise
.
resolve
(
kubeflowJobConfig
);
...
...
@@ -361,8 +361,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
* @param workerPodResources worker pod template
* @param nonWorkerPodResources non-worker pod template, like ps or master
*/
private
generateKubeflowJobConfig
(
trialJobId
:
string
,
trialWorkingFolder
:
string
,
kubeflowJobName
:
string
,
workerPodResources
:
any
,
nonWorkerPodResources
?:
any
)
:
any
{
private
async
generateKubeflowJobConfig
(
trialJobId
:
string
,
trialWorkingFolder
:
string
,
kubeflowJobName
:
string
,
workerPodResources
:
any
,
nonWorkerPodResources
?:
any
)
:
Promise
<
any
>
{
if
(
this
.
kubeflowClusterConfig
===
undefined
)
{
throw
new
Error
(
'
Kubeflow Cluster config is not initialized
'
);
}
...
...
@@ -377,29 +377,32 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
const
replicaSpecsObj
:
any
=
{};
const
replicaSpecsObjMap
:
Map
<
string
,
object
>
=
new
Map
<
string
,
object
>
();
if
(
this
.
kubeflowTrialConfig
.
operatorType
===
'
tf-operator
'
)
{
const
tensorflowTrialConfig
:
KubeflowTrialConfigTensorflow
=
<
KubeflowTrialConfigTensorflow
>
this
.
kubeflowTrialConfig
;
let
privateRegistrySecretName
=
await
this
.
createRegistrySecret
(
tensorflowTrialConfig
.
worker
.
privateRegistryAuthPath
);
replicaSpecsObj
.
Worker
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
tensorflowTrialConfig
.
worker
.
replicas
,
tensorflowTrialConfig
.
worker
.
image
,
'
run_worker.sh
'
,
workerPodResources
);
tensorflowTrialConfig
.
worker
.
image
,
'
run_worker.sh
'
,
workerPodResources
,
privateRegistrySecretName
);
if
(
tensorflowTrialConfig
.
ps
!==
undefined
)
{
let
privateRegistrySecretName
:
string
|
undefined
=
await
this
.
createRegistrySecret
(
tensorflowTrialConfig
.
ps
.
privateRegistryAuthPath
);
replicaSpecsObj
.
Ps
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
tensorflowTrialConfig
.
ps
.
replicas
,
tensorflowTrialConfig
.
ps
.
image
,
'
run_ps.sh
'
,
nonWorkerPodResources
);
tensorflowTrialConfig
.
ps
.
image
,
'
run_ps.sh
'
,
nonWorkerPodResources
,
privateRegistrySecretName
);
}
replicaSpecsObjMap
.
set
(
this
.
kubernetesCRDClient
.
jobKind
,
{
tfReplicaSpecs
:
replicaSpecsObj
});
}
else
if
(
this
.
kubeflowTrialConfig
.
operatorType
===
'
pytorch-operator
'
)
{
const
pytorchTrialConfig
:
KubeflowTrialConfigPytorch
=
<
KubeflowTrialConfigPytorch
>
this
.
kubeflowTrialConfig
;
if
(
pytorchTrialConfig
.
worker
!==
undefined
)
{
let
privateRegistrySecretName
:
string
|
undefined
=
await
this
.
createRegistrySecret
(
pytorchTrialConfig
.
worker
.
privateRegistryAuthPath
);
replicaSpecsObj
.
Worker
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
pytorchTrialConfig
.
worker
.
replicas
,
pytorchTrialConfig
.
worker
.
image
,
'
run_worker.sh
'
,
workerPodResources
);
pytorchTrialConfig
.
worker
.
image
,
'
run_worker.sh
'
,
workerPodResources
,
privateRegistrySecretName
);
}
let
privateRegistrySecretName
:
string
|
undefined
=
await
this
.
createRegistrySecret
(
pytorchTrialConfig
.
master
.
privateRegistryAuthPath
);
replicaSpecsObj
.
Master
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
pytorchTrialConfig
.
master
.
replicas
,
pytorchTrialConfig
.
master
.
image
,
'
run_master.sh
'
,
nonWorkerPodResources
);
pytorchTrialConfig
.
master
.
image
,
'
run_master.sh
'
,
nonWorkerPodResources
,
privateRegistrySecretName
);
replicaSpecsObjMap
.
set
(
this
.
kubernetesCRDClient
.
jobKind
,
{
pytorchReplicaSpecs
:
replicaSpecsObj
});
}
return
{
return
Promise
.
resolve
(
{
apiVersion
:
`kubeflow.org/
${
this
.
kubernetesCRDClient
.
apiVersion
}
`
,
kind
:
this
.
kubernetesCRDClient
.
jobKind
,
metadata
:
{
...
...
@@ -412,7 +415,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
},
spec
:
replicaSpecsObjMap
.
get
(
this
.
kubernetesCRDClient
.
jobKind
)
};
}
)
;
}
/**
...
...
@@ -424,7 +427,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
* @param podResources pod resource config section
*/
private
generateReplicaConfig
(
trialWorkingFolder
:
string
,
replicaNumber
:
number
,
replicaImage
:
string
,
runScriptFile
:
string
,
podResources
:
any
):
any
{
podResources
:
any
,
privateRegistrySecretName
:
string
|
undefined
):
any
{
if
(
this
.
kubeflowClusterConfig
===
undefined
)
{
throw
new
Error
(
'
Kubeflow Cluster config is not initialized
'
);
}
...
...
@@ -436,7 +439,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
if
(
this
.
kubernetesCRDClient
===
undefined
)
{
throw
new
Error
(
'
Kubeflow operator client is not initialized
'
);
}
// The config spec for volume field
const
volumeSpecMap
:
Map
<
string
,
object
>
=
new
Map
<
string
,
object
>
();
if
(
this
.
kubeflowClusterConfig
.
storageType
===
'
azureStorage
'
)
{
volumeSpecMap
.
set
(
'
nniVolumes
'
,
[
...
...
@@ -459,16 +462,9 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
}]);
}
return
{
replicas
:
replicaNumber
,
template
:
{
metadata
:
{
// tslint:disable-next-line:no-null-keyword
creationTimestamp
:
null
},
spec
:
{
containers
:
[
// The config spec for container field
const
containersSpecMap
:
Map
<
string
,
object
>
=
new
Map
<
string
,
object
>
();
containersSpecMap
.
set
(
'
containers
'
,
[
{
// Kubeflow tensorflow operator requires that containers' name must be tensorflow
// TODO: change the name based on operator's type
...
...
@@ -481,12 +477,29 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
mountPath
:
this
.
CONTAINER_MOUNT_PATH
}],
resources
:
podResources
}],
}
]);
let
spec
:
any
=
{
containers
:
containersSpecMap
.
get
(
'
containers
'
),
restartPolicy
:
'
ExitCode
'
,
volumes
:
volumeSpecMap
.
get
(
'
nniVolumes
'
)
}
if
(
privateRegistrySecretName
)
{
spec
.
imagePullSecrets
=
[
{
name
:
privateRegistrySecretName
}]
}
return
{
replicas
:
replicaNumber
,
template
:
{
metadata
:
{
// tslint:disable-next-line:no-null-keyword
creationTimestamp
:
null
},
spec
:
spec
}
}
};
}
}
// tslint:enable: no-unsafe-any no-any
...
...
src/nni_manager/training_service/kubernetes/kubernetesConfig.ts
View file @
88ef6c04
...
...
@@ -179,6 +179,9 @@ export class KubernetesTrialConfigTemplate {
// Docker image
public
readonly
image
:
string
;
// Private registry config file path to download docker iamge
public
readonly
privateRegistryAuthPath
?:
string
;
// Trail command
public
readonly
command
:
string
;
...
...
@@ -186,12 +189,13 @@ export class KubernetesTrialConfigTemplate {
public
readonly
gpuNum
:
number
;
constructor
(
command
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
)
{
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
privateRegistryAuthPath
?:
string
)
{
this
.
command
=
command
;
this
.
gpuNum
=
gpuNum
;
this
.
cpuNum
=
cpuNum
;
this
.
memoryMB
=
memoryMB
;
this
.
image
=
image
;
this
.
privateRegistryAuthPath
=
privateRegistryAuthPath
;
}
}
...
...
src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts
View file @
88ef6c04
...
...
@@ -38,6 +38,8 @@ import { KubernetesClusterConfig } from './kubernetesConfig';
import
{
kubernetesScriptFormat
,
KubernetesTrialJobDetail
}
from
'
./kubernetesData
'
;
import
{
KubernetesJobRestServer
}
from
'
./kubernetesJobRestServer
'
;
var
fs
=
require
(
'
fs
'
);
/**
* Training Service implementation for Kubernetes
*/
...
...
@@ -327,5 +329,34 @@ abstract class KubernetesTrainingService {
return
Promise
.
resolve
();
}
protected
async
createRegistrySecret
(
filePath
:
string
|
undefined
):
Promise
<
string
|
undefined
>
{
if
(
filePath
===
undefined
||
filePath
===
''
)
{
return
undefined
;
}
let
body
=
fs
.
readFileSync
(
filePath
).
toString
(
'
base64
'
);
let
registrySecretName
=
String
.
Format
(
'
nni-secret-{0}
'
,
uniqueString
(
8
)
.
toLowerCase
());
await
this
.
genericK8sClient
.
createSecret
(
{
apiVersion
:
'
v1
'
,
kind
:
'
Secret
'
,
metadata
:
{
name
:
registrySecretName
,
namespace
:
'
default
'
,
labels
:
{
app
:
this
.
NNI_KUBERNETES_TRIAL_LABEL
,
expId
:
getExperimentId
()
}
},
type
:
'
kubernetes.io/dockerconfigjson
'
,
data
:
{
'
.dockerconfigjson
'
:
body
}
}
);
return
registrySecretName
;
}
}
export
{
KubernetesTrainingService
};
src/nni_manager/training_service/pai/paiConfig.ts
View file @
88ef6c04
...
...
@@ -71,6 +71,8 @@ export class PAIJobConfig {
public
readonly
image
:
string
;
// Code directory on HDFS
public
readonly
codeDir
:
string
;
//authentication file used for private Docker registry
public
readonly
authFile
?:
string
;
// List of taskRole, one task role at least
public
taskRoles
:
PAITaskRole
[];
...
...
@@ -87,12 +89,13 @@ export class PAIJobConfig {
* @param taskRoles List of taskRole, one task role at least
*/
constructor
(
jobName
:
string
,
image
:
string
,
codeDir
:
string
,
taskRoles
:
PAITaskRole
[],
virtualCluster
:
string
)
{
taskRoles
:
PAITaskRole
[],
virtualCluster
:
string
,
authFile
?:
string
)
{
this
.
jobName
=
jobName
;
this
.
image
=
image
;
this
.
codeDir
=
codeDir
;
this
.
taskRoles
=
taskRoles
;
this
.
virtualCluster
=
virtualCluster
;
this
.
authFile
=
authFile
;
}
}
...
...
@@ -129,14 +132,17 @@ export class NNIPAITrialConfig extends TrialConfig {
public
virtualCluster
?:
string
;
//Shared memory for one task in the task role
public
shmMB
?:
number
;
//authentication file used for private Docker registry
public
authFile
?:
string
;
constructor
(
command
:
string
,
codeDir
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
virtualCluster
?:
string
,
shmMB
?:
number
)
{
image
:
string
,
virtualCluster
?:
string
,
shmMB
?:
number
,
authFile
?:
string
)
{
super
(
command
,
codeDir
,
gpuNum
);
this
.
cpuNum
=
cpuNum
;
this
.
memoryMB
=
memoryMB
;
this
.
image
=
image
;
this
.
virtualCluster
=
virtualCluster
;
this
.
shmMB
=
shmMB
;
this
.
authFile
=
authFile
;
}
}
src/nni_manager/training_service/pai/paiTrainingService.ts
View file @
88ef6c04
...
...
@@ -442,7 +442,7 @@ class PAITrainingService implements TrainingService {
// Task command
nniPaiTrialCommand
,
// Task shared memory
this
.
paiTrialConfig
.
shmMB
this
.
paiTrialConfig
.
shmMB
,
)
];
...
...
@@ -456,7 +456,9 @@ class PAITrainingService implements TrainingService {
// PAI Task roles
paiTaskRoles
,
// Add Virutal Cluster
this
.
paiTrialConfig
.
virtualCluster
===
undefined
?
'
default
'
:
this
.
paiTrialConfig
.
virtualCluster
.
toString
()
this
.
paiTrialConfig
.
virtualCluster
===
undefined
?
'
default
'
:
this
.
paiTrialConfig
.
virtualCluster
.
toString
(),
//Task auth File
this
.
paiTrialConfig
.
authFile
);
// Step 2. Upload code files in codeDir onto HDFS
...
...
src/sdk/pycli/nnicli/__init__.py
0 → 100644
View file @
88ef6c04
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
from
.nni_client
import
*
src/sdk/pycli/nnicli/nni_client.py
0 → 100644
View file @
88ef6c04
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
""" A python wrapper for nni rest api
Example:
import nnicli as nc
nc.start_nni('../../../../examples/trials/mnist/config.yml')
nc.set_endpoint('http://localhost:8080')
print(nc.version())
print(nc.get_experiment_status())
print(nc.get_job_statistics())
print(nc.list_trial_jobs())
nc.stop_nni()
"""
import
sys
import
os
import
subprocess
import
requests
__all__
=
[
'start_nni'
,
'stop_nni'
,
'set_endpoint'
,
'version'
,
'get_experiment_status'
,
'get_experiment_profile'
,
'get_trial_job'
,
'list_trial_jobs'
,
'get_job_statistics'
,
'get_job_metrics'
,
'export_data'
]
EXPERIMENT_PATH
=
'experiment'
VERSION_PATH
=
'version'
STATUS_PATH
=
'check-status'
JOB_STATISTICS_PATH
=
'job-statistics'
TRIAL_JOBS_PATH
=
'trial-jobs'
METRICS_PATH
=
'metric-data'
EXPORT_DATA_PATH
=
'export-data'
API_ROOT_PATH
=
'api/v1/nni'
_api_endpoint
=
None
def
set_endpoint
(
endpoint
):
"""set endpoint of nni rest server for nnicli, for example:
http://localhost:8080
"""
global
_api_endpoint
_api_endpoint
=
endpoint
def
_check_endpoint
():
if
_api_endpoint
is
None
:
raise
AssertionError
(
"Please call set_endpoint to specify nni endpoint"
)
def
_nni_rest_get
(
api_path
,
response_type
=
'json'
):
_check_endpoint
()
uri
=
'{}/{}/{}'
.
format
(
_api_endpoint
,
API_ROOT_PATH
,
api_path
)
res
=
requests
.
get
(
uri
)
if
_http_succeed
(
res
.
status_code
):
if
response_type
==
'json'
:
return
res
.
json
()
elif
response_type
==
'text'
:
return
res
.
text
else
:
raise
AssertionError
(
'Incorrect response_type'
)
else
:
return
None
def
_http_succeed
(
status_code
):
return
status_code
//
100
==
2
def
_create_process
(
cmd
):
if
sys
.
platform
==
'win32'
:
process
=
subprocess
.
Popen
(
cmd
,
stdout
=
subprocess
.
PIPE
,
creationflags
=
subprocess
.
CREATE_NEW_PROCESS_GROUP
)
else
:
process
=
subprocess
.
Popen
(
cmd
,
stdout
=
subprocess
.
PIPE
)
while
process
.
poll
()
is
None
:
output
=
process
.
stdout
.
readline
()
if
output
:
print
(
output
.
decode
(
'utf-8'
).
strip
())
return
process
.
returncode
def
start_nni
(
config_file
):
"""start nni experiment with specified configuration file"""
cmd
=
'nnictl create --config {}'
.
format
(
config_file
).
split
(
' '
)
if
_create_process
(
cmd
)
!=
0
:
raise
RuntimeError
(
'Failed to start nni.'
)
def
stop_nni
():
"""stop nni experiment"""
cmd
=
'nnictl stop'
.
split
(
' '
)
if
_create_process
(
cmd
)
!=
0
:
raise
RuntimeError
(
'Failed to stop nni.'
)
def
version
():
"""return version of nni"""
return
_nni_rest_get
(
VERSION_PATH
,
'text'
)
def
get_experiment_status
():
"""return experiment status as a dict"""
return
_nni_rest_get
(
STATUS_PATH
)
def
get_experiment_profile
():
"""return experiment profile as a dict"""
return
_nni_rest_get
(
EXPERIMENT_PATH
)
def
get_trial_job
(
trial_job_id
):
"""return trial job information as a dict"""
assert
trial_job_id
is
not
None
return
_nni_rest_get
(
os
.
path
.
join
(
TRIAL_JOBS_PATH
,
trial_job_id
))
def
list_trial_jobs
():
"""return information for all trial jobs as a list"""
return
_nni_rest_get
(
TRIAL_JOBS_PATH
)
def
get_job_statistics
():
"""return trial job statistics information as a dict"""
return
_nni_rest_get
(
JOB_STATISTICS_PATH
)
def
get_job_metrics
(
trial_job_id
=
None
):
"""return trial job metrics"""
api_path
=
METRICS_PATH
if
trial_job_id
is
None
else
os
.
path
.
join
(
METRICS_PATH
,
trial_job_id
)
return
_nni_rest_get
(
api_path
)
def
export_data
():
"""return exported information for all trial jobs"""
return
_nni_rest_get
(
EXPORT_DATA_PATH
)
src/sdk/pycli/setup.py
0 → 100644
View file @
88ef6c04
import
setuptools
setuptools
.
setup
(
name
=
'nnicli'
,
version
=
'999.0.0-developing'
,
packages
=
setuptools
.
find_packages
(),
python_requires
=
'>=3.5'
,
install_requires
=
[
'requests'
],
author
=
'Microsoft NNI Team'
,
author_email
=
'nni@microsoft.com'
,
description
=
'nnicli for Neural Network Intelligence project'
,
license
=
'MIT'
,
url
=
'https://github.com/Microsoft/nni'
,
)
src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
View file @
88ef6c04
...
...
@@ -190,13 +190,19 @@ class HyperoptTuner(Tuner):
HyperoptTuner is a tuner which using hyperopt algorithm.
"""
def
__init__
(
self
,
algorithm_name
,
optimize_mode
=
'minimize'
):
def
__init__
(
self
,
algorithm_name
,
optimize_mode
=
'minimize'
,
parallel_optimize
=
False
,
constant_liar_type
=
'min'
):
"""
Parameters
----------
algorithm_name : str
algorithm_name includes "tpe", "random_search" and anneal".
optimize_mode : str
parallel_optimize : bool
More detail could reference: docs/en_US/Tuner/HyperoptTuner.md
constant_liar_type : str
constant_liar_type including "min", "max" and "mean"
More detail could reference: docs/en_US/Tuner/HyperoptTuner.md
"""
self
.
algorithm_name
=
algorithm_name
self
.
optimize_mode
=
OptimizeMode
(
optimize_mode
)
...
...
@@ -205,6 +211,13 @@ class HyperoptTuner(Tuner):
self
.
rval
=
None
self
.
supplement_data_num
=
0
self
.
parallel
=
parallel_optimize
if
self
.
parallel
:
self
.
CL_rval
=
None
self
.
constant_liar_type
=
constant_liar_type
self
.
running_data
=
[]
self
.
optimal_y
=
None
def
_choose_tuner
(
self
,
algorithm_name
):
"""
Parameters
...
...
@@ -266,6 +279,10 @@ class HyperoptTuner(Tuner):
# but it can cause deplicate parameter rarely
total_params
=
self
.
get_suggestion
(
random_search
=
True
)
self
.
total_data
[
parameter_id
]
=
total_params
if
self
.
parallel
:
self
.
running_data
.
append
(
parameter_id
)
params
=
split_index
(
total_params
)
return
params
...
...
@@ -287,10 +304,39 @@ class HyperoptTuner(Tuner):
raise
RuntimeError
(
'Received parameter_id not in total_data.'
)
params
=
self
.
total_data
[
parameter_id
]
# code for parallel
if
self
.
parallel
:
constant_liar
=
kwargs
.
get
(
'constant_liar'
,
False
)
if
constant_liar
:
rval
=
self
.
CL_rval
else
:
rval
=
self
.
rval
self
.
running_data
.
remove
(
parameter_id
)
# update the reward of optimal_y
if
self
.
optimal_y
is
None
:
if
self
.
constant_liar_type
==
'mean'
:
self
.
optimal_y
=
[
reward
,
1
]
else
:
self
.
optimal_y
=
reward
else
:
if
self
.
constant_liar_type
==
'mean'
:
_sum
=
self
.
optimal_y
[
0
]
+
reward
_number
=
self
.
optimal_y
[
1
]
+
1
self
.
optimal_y
=
[
_sum
,
_number
]
elif
self
.
constant_liar_type
==
'min'
:
self
.
optimal_y
=
min
(
self
.
optimal_y
,
reward
)
elif
self
.
constant_liar_type
==
'max'
:
self
.
optimal_y
=
max
(
self
.
optimal_y
,
reward
)
logger
.
debug
(
"Update optimal_y with reward, optimal_y = %s"
,
self
.
optimal_y
)
else
:
rval
=
self
.
rval
if
self
.
optimize_mode
is
OptimizeMode
.
Maximize
:
reward
=
-
reward
rval
=
self
.
rval
domain
=
rval
.
domain
trials
=
rval
.
trials
...
...
@@ -375,13 +421,26 @@ class HyperoptTuner(Tuner):
total_params : dict
parameter suggestion
"""
if
self
.
parallel
and
len
(
self
.
total_data
)
>
20
and
len
(
self
.
running_data
)
and
self
.
optimal_y
is
not
None
:
self
.
CL_rval
=
copy
.
deepcopy
(
self
.
rval
)
if
self
.
constant_liar_type
==
'mean'
:
_constant_liar_y
=
self
.
optimal_y
[
0
]
/
self
.
optimal_y
[
1
]
else
:
_constant_liar_y
=
self
.
optimal_y
for
_parameter_id
in
self
.
running_data
:
self
.
receive_trial_result
(
parameter_id
=
_parameter_id
,
parameters
=
None
,
value
=
_constant_liar_y
,
constant_liar
=
True
)
rval
=
self
.
CL_rval
random_state
=
np
.
random
.
randint
(
2
**
31
-
1
)
else
:
rval
=
self
.
rval
random_state
=
rval
.
rstate
.
randint
(
2
**
31
-
1
)
trials
=
rval
.
trials
algorithm
=
rval
.
algo
new_ids
=
rval
.
trials
.
new_trial_ids
(
1
)
rval
.
trials
.
refresh
()
random_state
=
rval
.
rstate
.
randint
(
2
**
31
-
1
)
if
random_search
:
new_trials
=
hp
.
rand
.
suggest
(
new_ids
,
rval
.
domain
,
trials
,
random_state
)
...
...
test/cli_test.py
0 → 100644
View file @
88ef6c04
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
sys
import
time
import
traceback
from
utils
import
GREEN
,
RED
,
CLEAR
,
setup_experiment
def
test_nni_cli
():
import
nnicli
as
nc
config_file
=
'config_test/examples/mnist.test.yml'
try
:
# Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict
time
.
sleep
(
6
)
print
(
GREEN
+
'Testing nnicli:'
+
config_file
+
CLEAR
)
nc
.
start_nni
(
config_file
)
time
.
sleep
(
3
)
nc
.
set_endpoint
(
'http://localhost:8080'
)
print
(
nc
.
version
())
print
(
nc
.
get_job_statistics
())
print
(
nc
.
get_experiment_status
())
nc
.
list_trial_jobs
()
print
(
GREEN
+
'Test nnicli {}: TEST PASS'
.
format
(
config_file
)
+
CLEAR
)
except
Exception
as
error
:
print
(
RED
+
'Test nnicli {}: TEST FAIL'
.
format
(
config_file
)
+
CLEAR
)
print
(
'%r'
%
error
)
traceback
.
print_exc
()
raise
error
finally
:
nc
.
stop_nni
()
if
__name__
==
'__main__'
:
installed
=
(
sys
.
argv
[
-
1
]
!=
'--preinstall'
)
setup_experiment
(
installed
)
test_nni_cli
()
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment