Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
88ef6c04
Unverified
Commit
88ef6c04
authored
Aug 01, 2019
by
SparkSnail
Committed by
GitHub
Aug 01, 2019
Browse files
Merge pull request #197 from microsoft/master
merge master
parents
5f3c5ffd
555334de
Changes
48
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
951 additions
and
61 deletions
+951
-61
docs/img/parallel_tpe_search_result.PNG
docs/img/parallel_tpe_search_result.PNG
+0
-0
docs/img/parallel_tpe_search_tpe.PNG
docs/img/parallel_tpe_search_tpe.PNG
+0
-0
examples/notebooks/retrieve_nni_info_with_python.ipynb
examples/notebooks/retrieve_nni_info_with_python.ipynb
+497
-0
examples/trials/auto-feature-engineering/README.md
examples/trials/auto-feature-engineering/README.md
+8
-0
examples/tuners/random_nas_tuner/random_nas_tuner.py
examples/tuners/random_nas_tuner/random_nas_tuner.py
+3
-1
setup.py
setup.py
+2
-1
src/nni_manager/rest_server/restValidationSchemas.ts
src/nni_manager/rest_server/restValidationSchemas.ts
+5
-0
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts
...bernetes/frameworkcontroller/frameworkcontrollerConfig.ts
+2
-2
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts
...frameworkcontroller/frameworkcontrollerTrainingService.ts
+25
-14
src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts
...er/training_service/kubernetes/kubeflow/kubeflowConfig.ts
+2
-2
src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts
...ng_service/kubernetes/kubeflow/kubeflowTrainingService.ts
+45
-32
src/nni_manager/training_service/kubernetes/kubernetesConfig.ts
...i_manager/training_service/kubernetes/kubernetesConfig.ts
+5
-1
src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts
.../training_service/kubernetes/kubernetesTrainingService.ts
+31
-0
src/nni_manager/training_service/pai/paiConfig.ts
src/nni_manager/training_service/pai/paiConfig.ts
+8
-2
src/nni_manager/training_service/pai/paiTrainingService.ts
src/nni_manager/training_service/pai/paiTrainingService.ts
+4
-2
src/sdk/pycli/nnicli/__init__.py
src/sdk/pycli/nnicli/__init__.py
+21
-0
src/sdk/pycli/nnicli/nni_client.py
src/sdk/pycli/nnicli/nni_client.py
+156
-0
src/sdk/pycli/setup.py
src/sdk/pycli/setup.py
+18
-0
src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
+63
-4
test/cli_test.py
test/cli_test.py
+56
-0
No files found.
docs/img/parallel_tpe_search_result.PNG
0 → 100644
View file @
88ef6c04
363 KB
docs/img/parallel_tpe_search_tpe.PNG
0 → 100644
View file @
88ef6c04
9.5 KB
examples/notebooks/retrieve_nni_info_with_python.ipynb
0 → 100644
View file @
88ef6c04
This diff is collapsed.
Click to expand it.
examples/trials/auto-feature-engineering/README.md
0 → 100644
View file @
88ef6c04
**Automatic Feature Engineering in nni**
===
Now we have an
[
example
](
https://github.com/SpongebBob/tabular_automl_NNI
)
, which could automaticlly do feature engineering in nni.
These code come from our contributors. And thanks our lovely contributors!
And welcome more and more people to join us!
examples/tuners/random_nas_tuner/random_nas_tuner.py
View file @
88ef6c04
...
@@ -7,7 +7,9 @@ def random_archi_generator(nas_ss, random_state):
...
@@ -7,7 +7,9 @@ def random_archi_generator(nas_ss, random_state):
'''
'''
chosen_archi
=
{}
chosen_archi
=
{}
print
(
"zql: nas search space: "
,
nas_ss
)
print
(
"zql: nas search space: "
,
nas_ss
)
for
block_name
,
block
in
nas_ss
.
items
():
for
block_name
,
block_value
in
nas_ss
.
items
():
assert
block_value
[
'_type'
]
==
"mutable_layer"
,
"Random NAS Tuner only receives NAS search space whose _type is 'mutable_layer'"
block
=
block_value
[
'_value'
]
tmp_block
=
{}
tmp_block
=
{}
for
layer_name
,
layer
in
block
.
items
():
for
layer_name
,
layer
in
block
.
items
():
tmp_layer
=
{}
tmp_layer
=
{}
...
...
setup.py
View file @
88ef6c04
...
@@ -35,9 +35,10 @@ setup(
...
@@ -35,9 +35,10 @@ setup(
license
=
'MIT'
,
license
=
'MIT'
,
url
=
'https://github.com/Microsoft/nni'
,
url
=
'https://github.com/Microsoft/nni'
,
packages
=
find_packages
(
'src/sdk/pynni'
,
exclude
=
[
'tests'
])
+
find_packages
(
'tools'
),
packages
=
find_packages
(
'src/sdk/pynni'
,
exclude
=
[
'tests'
])
+
find_packages
(
'src/sdk/pycli'
)
+
find_packages
(
'tools'
),
package_dir
=
{
package_dir
=
{
'nni'
:
'src/sdk/pynni/nni'
,
'nni'
:
'src/sdk/pynni/nni'
,
'nnicli'
:
'src/sdk/pycli/nnicli'
,
'nni_annotation'
:
'tools/nni_annotation'
,
'nni_annotation'
:
'tools/nni_annotation'
,
'nni_cmd'
:
'tools/nni_cmd'
,
'nni_cmd'
:
'tools/nni_cmd'
,
'nni_trial_tool'
:
'tools/nni_trial_tool'
,
'nni_trial_tool'
:
'tools/nni_trial_tool'
,
...
...
src/nni_manager/rest_server/restValidationSchemas.ts
View file @
88ef6c04
...
@@ -51,10 +51,12 @@ export namespace ValidationSchemas {
...
@@ -51,10 +51,12 @@ export namespace ValidationSchemas {
command
:
joi
.
string
().
min
(
1
),
command
:
joi
.
string
().
min
(
1
),
virtualCluster
:
joi
.
string
(),
virtualCluster
:
joi
.
string
(),
shmMB
:
joi
.
number
(),
shmMB
:
joi
.
number
(),
authFile
:
joi
.
string
(),
nasMode
:
joi
.
string
().
valid
(
'
classic_mode
'
,
'
enas_mode
'
,
'
oneshot_mode
'
),
nasMode
:
joi
.
string
().
valid
(
'
classic_mode
'
,
'
enas_mode
'
,
'
oneshot_mode
'
),
worker
:
joi
.
object
({
worker
:
joi
.
object
({
replicas
:
joi
.
number
().
min
(
1
).
required
(),
replicas
:
joi
.
number
().
min
(
1
).
required
(),
image
:
joi
.
string
().
min
(
1
),
image
:
joi
.
string
().
min
(
1
),
privateRegistryAuthPath
:
joi
.
string
().
min
(
1
),
outputDir
:
joi
.
string
(),
outputDir
:
joi
.
string
(),
cpuNum
:
joi
.
number
().
min
(
1
),
cpuNum
:
joi
.
number
().
min
(
1
),
memoryMB
:
joi
.
number
().
min
(
100
),
memoryMB
:
joi
.
number
().
min
(
100
),
...
@@ -64,6 +66,7 @@ export namespace ValidationSchemas {
...
@@ -64,6 +66,7 @@ export namespace ValidationSchemas {
ps
:
joi
.
object
({
ps
:
joi
.
object
({
replicas
:
joi
.
number
().
min
(
1
).
required
(),
replicas
:
joi
.
number
().
min
(
1
).
required
(),
image
:
joi
.
string
().
min
(
1
),
image
:
joi
.
string
().
min
(
1
),
privateRegistryAuthPath
:
joi
.
string
().
min
(
1
),
outputDir
:
joi
.
string
(),
outputDir
:
joi
.
string
(),
cpuNum
:
joi
.
number
().
min
(
1
),
cpuNum
:
joi
.
number
().
min
(
1
),
memoryMB
:
joi
.
number
().
min
(
100
),
memoryMB
:
joi
.
number
().
min
(
100
),
...
@@ -73,6 +76,7 @@ export namespace ValidationSchemas {
...
@@ -73,6 +76,7 @@ export namespace ValidationSchemas {
master
:
joi
.
object
({
master
:
joi
.
object
({
replicas
:
joi
.
number
().
min
(
1
).
required
(),
replicas
:
joi
.
number
().
min
(
1
).
required
(),
image
:
joi
.
string
().
min
(
1
),
image
:
joi
.
string
().
min
(
1
),
privateRegistryAuthPath
:
joi
.
string
().
min
(
1
),
outputDir
:
joi
.
string
(),
outputDir
:
joi
.
string
(),
cpuNum
:
joi
.
number
().
min
(
1
),
cpuNum
:
joi
.
number
().
min
(
1
),
memoryMB
:
joi
.
number
().
min
(
100
),
memoryMB
:
joi
.
number
().
min
(
100
),
...
@@ -83,6 +87,7 @@ export namespace ValidationSchemas {
...
@@ -83,6 +87,7 @@ export namespace ValidationSchemas {
name
:
joi
.
string
().
min
(
1
),
name
:
joi
.
string
().
min
(
1
),
taskNum
:
joi
.
number
().
min
(
1
).
required
(),
taskNum
:
joi
.
number
().
min
(
1
).
required
(),
image
:
joi
.
string
().
min
(
1
),
image
:
joi
.
string
().
min
(
1
),
privateRegistryAuthPath
:
joi
.
string
().
min
(
1
),
outputDir
:
joi
.
string
(),
outputDir
:
joi
.
string
(),
cpuNum
:
joi
.
number
().
min
(
1
),
cpuNum
:
joi
.
number
().
min
(
1
),
memoryMB
:
joi
.
number
().
min
(
100
),
memoryMB
:
joi
.
number
().
min
(
100
),
...
...
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerConfig.ts
View file @
88ef6c04
...
@@ -43,8 +43,8 @@ export class FrameworkControllerTrialConfigTemplate extends KubernetesTrialConfi
...
@@ -43,8 +43,8 @@ export class FrameworkControllerTrialConfigTemplate extends KubernetesTrialConfi
public
readonly
taskNum
:
number
;
public
readonly
taskNum
:
number
;
constructor
(
taskNum
:
number
,
command
:
string
,
gpuNum
:
number
,
constructor
(
taskNum
:
number
,
command
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
frameworkAttemptCompletionPolicy
:
FrameworkAttemptCompletionPolicy
)
{
frameworkAttemptCompletionPolicy
:
FrameworkAttemptCompletionPolicy
,
privateRegistryFilePath
?:
string
|
undefined
)
{
super
(
command
,
gpuNum
,
cpuNum
,
memoryMB
,
image
);
super
(
command
,
gpuNum
,
cpuNum
,
memoryMB
,
image
,
privateRegistryFilePath
);
this
.
frameworkAttemptCompletionPolicy
=
frameworkAttemptCompletionPolicy
;
this
.
frameworkAttemptCompletionPolicy
=
frameworkAttemptCompletionPolicy
;
this
.
name
=
name
;
this
.
name
=
name
;
this
.
taskNum
=
taskNum
;
this
.
taskNum
=
taskNum
;
...
...
src/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts
View file @
88ef6c04
...
@@ -305,7 +305,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
...
@@ -305,7 +305,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
}
// Generate frameworkcontroller job resource config object
// Generate frameworkcontroller job resource config object
const
frameworkcontrollerJobConfig
:
any
=
const
frameworkcontrollerJobConfig
:
any
=
this
.
generateFrameworkControllerJobConfig
(
trialJobId
,
trialWorkingFolder
,
frameworkcontrollerJobName
,
podResources
);
await
this
.
generateFrameworkControllerJobConfig
(
trialJobId
,
trialWorkingFolder
,
frameworkcontrollerJobName
,
podResources
);
return
Promise
.
resolve
(
frameworkcontrollerJobConfig
);
return
Promise
.
resolve
(
frameworkcontrollerJobConfig
);
}
}
...
@@ -329,8 +329,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
...
@@ -329,8 +329,8 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
* @param frameworkcontrollerJobName job name
* @param frameworkcontrollerJobName job name
* @param podResources pod template
* @param podResources pod template
*/
*/
private
generateFrameworkControllerJobConfig
(
trialJobId
:
string
,
trialWorkingFolder
:
string
,
private
async
generateFrameworkControllerJobConfig
(
trialJobId
:
string
,
trialWorkingFolder
:
string
,
frameworkcontrollerJobName
:
string
,
podResources
:
any
)
:
any
{
frameworkcontrollerJobName
:
string
,
podResources
:
any
)
:
Promise
<
any
>
{
if
(
this
.
fcClusterConfig
===
undefined
)
{
if
(
this
.
fcClusterConfig
===
undefined
)
{
throw
new
Error
(
'
frameworkcontroller Cluster config is not initialized
'
);
throw
new
Error
(
'
frameworkcontroller Cluster config is not initialized
'
);
}
}
...
@@ -345,12 +345,14 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
...
@@ -345,12 +345,14 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
if
(
containerPort
===
undefined
)
{
if
(
containerPort
===
undefined
)
{
throw
new
Error
(
'
Container port is not initialized
'
);
throw
new
Error
(
'
Container port is not initialized
'
);
}
}
const
taskRole
:
any
=
this
.
generateTaskRoleConfig
(
const
taskRole
:
any
=
this
.
generateTaskRoleConfig
(
trialWorkingFolder
,
trialWorkingFolder
,
this
.
fcTrialConfig
.
taskRoles
[
index
].
image
,
this
.
fcTrialConfig
.
taskRoles
[
index
].
image
,
`run_
${
this
.
fcTrialConfig
.
taskRoles
[
index
].
name
}
.sh`
,
`run_
${
this
.
fcTrialConfig
.
taskRoles
[
index
].
name
}
.sh`
,
podResources
[
index
],
podResources
[
index
],
containerPort
containerPort
,
await
this
.
createRegistrySecret
(
this
.
fcTrialConfig
.
taskRoles
[
index
].
privateRegistryAuthPath
)
);
);
taskRoles
.
push
({
taskRoles
.
push
({
name
:
this
.
fcTrialConfig
.
taskRoles
[
index
].
name
,
name
:
this
.
fcTrialConfig
.
taskRoles
[
index
].
name
,
...
@@ -363,7 +365,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
...
@@ -363,7 +365,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
});
});
}
}
return
{
return
Promise
.
resolve
(
{
apiVersion
:
`frameworkcontroller.microsoft.com/v1`
,
apiVersion
:
`frameworkcontroller.microsoft.com/v1`
,
kind
:
'
Framework
'
,
kind
:
'
Framework
'
,
metadata
:
{
metadata
:
{
...
@@ -379,11 +381,11 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
...
@@ -379,11 +381,11 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
executionType
:
'
Start
'
,
executionType
:
'
Start
'
,
taskRoles
:
taskRoles
taskRoles
:
taskRoles
}
}
};
}
)
;
}
}
private
generateTaskRoleConfig
(
trialWorkingFolder
:
string
,
replicaImage
:
string
,
runScriptFile
:
string
,
private
generateTaskRoleConfig
(
trialWorkingFolder
:
string
,
replicaImage
:
string
,
runScriptFile
:
string
,
podResources
:
any
,
containerPort
:
number
):
any
{
podResources
:
any
,
containerPort
:
number
,
privateRegistrySecretName
:
string
|
undefined
):
any
{
if
(
this
.
fcClusterConfig
===
undefined
)
{
if
(
this
.
fcClusterConfig
===
undefined
)
{
throw
new
Error
(
'
frameworkcontroller Cluster config is not initialized
'
);
throw
new
Error
(
'
frameworkcontroller Cluster config is not initialized
'
);
}
}
...
@@ -451,13 +453,22 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
...
@@ -451,13 +453,22 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
mountPath
:
'
/mnt/frameworkbarrier
'
mountPath
:
'
/mnt/frameworkbarrier
'
}]
}]
}];
}];
const
spec
:
any
=
{
containers
:
containers
,
let
spec
:
any
=
{
initContainers
:
initContainers
,
containers
:
containers
,
restartPolicy
:
'
OnFailure
'
,
initContainers
:
initContainers
,
volumes
:
volumeSpecMap
.
get
(
'
nniVolumes
'
),
restartPolicy
:
'
OnFailure
'
,
hostNetwork
:
false
volumes
:
volumeSpecMap
.
get
(
'
nniVolumes
'
),
hostNetwork
:
false
};
};
if
(
privateRegistrySecretName
)
{
spec
.
imagePullSecrets
=
[
{
name
:
privateRegistrySecretName
}
]
}
if
(
this
.
fcClusterConfig
.
serviceAccountName
!==
undefined
)
{
if
(
this
.
fcClusterConfig
.
serviceAccountName
!==
undefined
)
{
spec
.
serviceAccountName
=
this
.
fcClusterConfig
.
serviceAccountName
;
spec
.
serviceAccountName
=
this
.
fcClusterConfig
.
serviceAccountName
;
}
}
...
...
src/nni_manager/training_service/kubernetes/kubeflow/kubeflowConfig.ts
View file @
88ef6c04
...
@@ -135,8 +135,8 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig {
...
@@ -135,8 +135,8 @@ export class KubeflowTrialConfig extends KubernetesTrialConfig {
export
class
KubeflowTrialConfigTemplate
extends
KubernetesTrialConfigTemplate
{
export
class
KubeflowTrialConfigTemplate
extends
KubernetesTrialConfigTemplate
{
public
readonly
replicas
:
number
;
public
readonly
replicas
:
number
;
constructor
(
replicas
:
number
,
command
:
string
,
gpuNum
:
number
,
constructor
(
replicas
:
number
,
command
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
)
{
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
privateRegistryAuthPath
?:
string
)
{
super
(
command
,
gpuNum
,
cpuNum
,
memoryMB
,
image
);
super
(
command
,
gpuNum
,
cpuNum
,
memoryMB
,
image
,
privateRegistryAuthPath
);
this
.
replicas
=
replicas
;
this
.
replicas
=
replicas
;
}
}
}
}
...
...
src/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts
View file @
88ef6c04
...
@@ -347,7 +347,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
...
@@ -347,7 +347,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
}
// Generate kubeflow job resource config object
// Generate kubeflow job resource config object
const
kubeflowJobConfig
:
any
=
this
.
generateKubeflowJobConfig
(
trialJobId
,
trialWorkingFolder
,
kubeflowJobName
,
workerPodResources
,
const
kubeflowJobConfig
:
any
=
await
this
.
generateKubeflowJobConfig
(
trialJobId
,
trialWorkingFolder
,
kubeflowJobName
,
workerPodResources
,
nonWorkerResources
);
nonWorkerResources
);
return
Promise
.
resolve
(
kubeflowJobConfig
);
return
Promise
.
resolve
(
kubeflowJobConfig
);
...
@@ -361,8 +361,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
...
@@ -361,8 +361,8 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
* @param workerPodResources worker pod template
* @param workerPodResources worker pod template
* @param nonWorkerPodResources non-worker pod template, like ps or master
* @param nonWorkerPodResources non-worker pod template, like ps or master
*/
*/
private
generateKubeflowJobConfig
(
trialJobId
:
string
,
trialWorkingFolder
:
string
,
kubeflowJobName
:
string
,
workerPodResources
:
any
,
private
async
generateKubeflowJobConfig
(
trialJobId
:
string
,
trialWorkingFolder
:
string
,
kubeflowJobName
:
string
,
workerPodResources
:
any
,
nonWorkerPodResources
?:
any
)
:
any
{
nonWorkerPodResources
?:
any
)
:
Promise
<
any
>
{
if
(
this
.
kubeflowClusterConfig
===
undefined
)
{
if
(
this
.
kubeflowClusterConfig
===
undefined
)
{
throw
new
Error
(
'
Kubeflow Cluster config is not initialized
'
);
throw
new
Error
(
'
Kubeflow Cluster config is not initialized
'
);
}
}
...
@@ -377,29 +377,32 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
...
@@ -377,29 +377,32 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
const
replicaSpecsObj
:
any
=
{};
const
replicaSpecsObj
:
any
=
{};
const
replicaSpecsObjMap
:
Map
<
string
,
object
>
=
new
Map
<
string
,
object
>
();
const
replicaSpecsObjMap
:
Map
<
string
,
object
>
=
new
Map
<
string
,
object
>
();
if
(
this
.
kubeflowTrialConfig
.
operatorType
===
'
tf-operator
'
)
{
if
(
this
.
kubeflowTrialConfig
.
operatorType
===
'
tf-operator
'
)
{
const
tensorflowTrialConfig
:
KubeflowTrialConfigTensorflow
=
<
KubeflowTrialConfigTensorflow
>
this
.
kubeflowTrialConfig
;
const
tensorflowTrialConfig
:
KubeflowTrialConfigTensorflow
=
<
KubeflowTrialConfigTensorflow
>
this
.
kubeflowTrialConfig
;
let
privateRegistrySecretName
=
await
this
.
createRegistrySecret
(
tensorflowTrialConfig
.
worker
.
privateRegistryAuthPath
);
replicaSpecsObj
.
Worker
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
tensorflowTrialConfig
.
worker
.
replicas
,
replicaSpecsObj
.
Worker
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
tensorflowTrialConfig
.
worker
.
replicas
,
tensorflowTrialConfig
.
worker
.
image
,
'
run_worker.sh
'
,
workerPodResources
);
tensorflowTrialConfig
.
worker
.
image
,
'
run_worker.sh
'
,
workerPodResources
,
privateRegistrySecretName
);
if
(
tensorflowTrialConfig
.
ps
!==
undefined
)
{
if
(
tensorflowTrialConfig
.
ps
!==
undefined
)
{
let
privateRegistrySecretName
:
string
|
undefined
=
await
this
.
createRegistrySecret
(
tensorflowTrialConfig
.
ps
.
privateRegistryAuthPath
);
replicaSpecsObj
.
Ps
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
tensorflowTrialConfig
.
ps
.
replicas
,
replicaSpecsObj
.
Ps
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
tensorflowTrialConfig
.
ps
.
replicas
,
tensorflowTrialConfig
.
ps
.
image
,
'
run_ps.sh
'
,
nonWorkerPodResources
);
tensorflowTrialConfig
.
ps
.
image
,
'
run_ps.sh
'
,
nonWorkerPodResources
,
privateRegistrySecretName
);
}
}
replicaSpecsObjMap
.
set
(
this
.
kubernetesCRDClient
.
jobKind
,
{
tfReplicaSpecs
:
replicaSpecsObj
});
replicaSpecsObjMap
.
set
(
this
.
kubernetesCRDClient
.
jobKind
,
{
tfReplicaSpecs
:
replicaSpecsObj
});
}
else
if
(
this
.
kubeflowTrialConfig
.
operatorType
===
'
pytorch-operator
'
)
{
}
else
if
(
this
.
kubeflowTrialConfig
.
operatorType
===
'
pytorch-operator
'
)
{
const
pytorchTrialConfig
:
KubeflowTrialConfigPytorch
=
<
KubeflowTrialConfigPytorch
>
this
.
kubeflowTrialConfig
;
const
pytorchTrialConfig
:
KubeflowTrialConfigPytorch
=
<
KubeflowTrialConfigPytorch
>
this
.
kubeflowTrialConfig
;
if
(
pytorchTrialConfig
.
worker
!==
undefined
)
{
if
(
pytorchTrialConfig
.
worker
!==
undefined
)
{
let
privateRegistrySecretName
:
string
|
undefined
=
await
this
.
createRegistrySecret
(
pytorchTrialConfig
.
worker
.
privateRegistryAuthPath
);
replicaSpecsObj
.
Worker
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
pytorchTrialConfig
.
worker
.
replicas
,
replicaSpecsObj
.
Worker
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
pytorchTrialConfig
.
worker
.
replicas
,
pytorchTrialConfig
.
worker
.
image
,
'
run_worker.sh
'
,
workerPodResources
);
pytorchTrialConfig
.
worker
.
image
,
'
run_worker.sh
'
,
workerPodResources
,
privateRegistrySecretName
);
}
}
let
privateRegistrySecretName
:
string
|
undefined
=
await
this
.
createRegistrySecret
(
pytorchTrialConfig
.
master
.
privateRegistryAuthPath
);
replicaSpecsObj
.
Master
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
pytorchTrialConfig
.
master
.
replicas
,
replicaSpecsObj
.
Master
=
this
.
generateReplicaConfig
(
trialWorkingFolder
,
pytorchTrialConfig
.
master
.
replicas
,
pytorchTrialConfig
.
master
.
image
,
'
run_master.sh
'
,
nonWorkerPodResources
);
pytorchTrialConfig
.
master
.
image
,
'
run_master.sh
'
,
nonWorkerPodResources
,
privateRegistrySecretName
);
replicaSpecsObjMap
.
set
(
this
.
kubernetesCRDClient
.
jobKind
,
{
pytorchReplicaSpecs
:
replicaSpecsObj
});
replicaSpecsObjMap
.
set
(
this
.
kubernetesCRDClient
.
jobKind
,
{
pytorchReplicaSpecs
:
replicaSpecsObj
});
}
}
return
{
return
Promise
.
resolve
(
{
apiVersion
:
`kubeflow.org/
${
this
.
kubernetesCRDClient
.
apiVersion
}
`
,
apiVersion
:
`kubeflow.org/
${
this
.
kubernetesCRDClient
.
apiVersion
}
`
,
kind
:
this
.
kubernetesCRDClient
.
jobKind
,
kind
:
this
.
kubernetesCRDClient
.
jobKind
,
metadata
:
{
metadata
:
{
...
@@ -412,7 +415,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
...
@@ -412,7 +415,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
}
},
},
spec
:
replicaSpecsObjMap
.
get
(
this
.
kubernetesCRDClient
.
jobKind
)
spec
:
replicaSpecsObjMap
.
get
(
this
.
kubernetesCRDClient
.
jobKind
)
};
}
)
;
}
}
/**
/**
...
@@ -424,7 +427,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
...
@@ -424,7 +427,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
* @param podResources pod resource config section
* @param podResources pod resource config section
*/
*/
private
generateReplicaConfig
(
trialWorkingFolder
:
string
,
replicaNumber
:
number
,
replicaImage
:
string
,
runScriptFile
:
string
,
private
generateReplicaConfig
(
trialWorkingFolder
:
string
,
replicaNumber
:
number
,
replicaImage
:
string
,
runScriptFile
:
string
,
podResources
:
any
):
any
{
podResources
:
any
,
privateRegistrySecretName
:
string
|
undefined
):
any
{
if
(
this
.
kubeflowClusterConfig
===
undefined
)
{
if
(
this
.
kubeflowClusterConfig
===
undefined
)
{
throw
new
Error
(
'
Kubeflow Cluster config is not initialized
'
);
throw
new
Error
(
'
Kubeflow Cluster config is not initialized
'
);
}
}
...
@@ -436,7 +439,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
...
@@ -436,7 +439,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
if
(
this
.
kubernetesCRDClient
===
undefined
)
{
if
(
this
.
kubernetesCRDClient
===
undefined
)
{
throw
new
Error
(
'
Kubeflow operator client is not initialized
'
);
throw
new
Error
(
'
Kubeflow operator client is not initialized
'
);
}
}
// The config spec for volume field
const
volumeSpecMap
:
Map
<
string
,
object
>
=
new
Map
<
string
,
object
>
();
const
volumeSpecMap
:
Map
<
string
,
object
>
=
new
Map
<
string
,
object
>
();
if
(
this
.
kubeflowClusterConfig
.
storageType
===
'
azureStorage
'
)
{
if
(
this
.
kubeflowClusterConfig
.
storageType
===
'
azureStorage
'
)
{
volumeSpecMap
.
set
(
'
nniVolumes
'
,
[
volumeSpecMap
.
set
(
'
nniVolumes
'
,
[
...
@@ -459,7 +462,34 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
...
@@ -459,7 +462,34 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
}
}
}]);
}]);
}
}
// The config spec for container field
const
containersSpecMap
:
Map
<
string
,
object
>
=
new
Map
<
string
,
object
>
();
containersSpecMap
.
set
(
'
containers
'
,
[
{
// Kubeflow tensorflow operator requires that containers' name must be tensorflow
// TODO: change the name based on operator's type
name
:
this
.
kubernetesCRDClient
.
containerName
,
image
:
replicaImage
,
args
:
[
'
sh
'
,
`
${
path
.
join
(
trialWorkingFolder
,
runScriptFile
)}
`
],
volumeMounts
:
[
{
name
:
'
nni-vol
'
,
mountPath
:
this
.
CONTAINER_MOUNT_PATH
}],
resources
:
podResources
}
]);
let
spec
:
any
=
{
containers
:
containersSpecMap
.
get
(
'
containers
'
),
restartPolicy
:
'
ExitCode
'
,
volumes
:
volumeSpecMap
.
get
(
'
nniVolumes
'
)
}
if
(
privateRegistrySecretName
)
{
spec
.
imagePullSecrets
=
[
{
name
:
privateRegistrySecretName
}]
}
return
{
return
{
replicas
:
replicaNumber
,
replicas
:
replicaNumber
,
template
:
{
template
:
{
...
@@ -467,26 +497,9 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
...
@@ -467,26 +497,9 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
// tslint:disable-next-line:no-null-keyword
// tslint:disable-next-line:no-null-keyword
creationTimestamp
:
null
creationTimestamp
:
null
},
},
spec
:
{
spec
:
spec
containers
:
[
{
// Kubeflow tensorflow operator requires that containers' name must be tensorflow
// TODO: change the name based on operator's type
name
:
this
.
kubernetesCRDClient
.
containerName
,
image
:
replicaImage
,
args
:
[
'
sh
'
,
`
${
path
.
join
(
trialWorkingFolder
,
runScriptFile
)}
`
],
volumeMounts
:
[
{
name
:
'
nni-vol
'
,
mountPath
:
this
.
CONTAINER_MOUNT_PATH
}],
resources
:
podResources
}],
restartPolicy
:
'
ExitCode
'
,
volumes
:
volumeSpecMap
.
get
(
'
nniVolumes
'
)
}
}
}
}
;
}
}
}
}
}
// tslint:enable: no-unsafe-any no-any
// tslint:enable: no-unsafe-any no-any
...
...
src/nni_manager/training_service/kubernetes/kubernetesConfig.ts
View file @
88ef6c04
...
@@ -179,6 +179,9 @@ export class KubernetesTrialConfigTemplate {
...
@@ -179,6 +179,9 @@ export class KubernetesTrialConfigTemplate {
// Docker image
// Docker image
public
readonly
image
:
string
;
public
readonly
image
:
string
;
// Private registry config file path to download docker iamge
public
readonly
privateRegistryAuthPath
?:
string
;
// Trail command
// Trail command
public
readonly
command
:
string
;
public
readonly
command
:
string
;
...
@@ -186,12 +189,13 @@ export class KubernetesTrialConfigTemplate {
...
@@ -186,12 +189,13 @@ export class KubernetesTrialConfigTemplate {
public
readonly
gpuNum
:
number
;
public
readonly
gpuNum
:
number
;
constructor
(
command
:
string
,
gpuNum
:
number
,
constructor
(
command
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
)
{
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
privateRegistryAuthPath
?:
string
)
{
this
.
command
=
command
;
this
.
command
=
command
;
this
.
gpuNum
=
gpuNum
;
this
.
gpuNum
=
gpuNum
;
this
.
cpuNum
=
cpuNum
;
this
.
cpuNum
=
cpuNum
;
this
.
memoryMB
=
memoryMB
;
this
.
memoryMB
=
memoryMB
;
this
.
image
=
image
;
this
.
image
=
image
;
this
.
privateRegistryAuthPath
=
privateRegistryAuthPath
;
}
}
}
}
...
...
src/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts
View file @
88ef6c04
...
@@ -38,6 +38,8 @@ import { KubernetesClusterConfig } from './kubernetesConfig';
...
@@ -38,6 +38,8 @@ import { KubernetesClusterConfig } from './kubernetesConfig';
import
{
kubernetesScriptFormat
,
KubernetesTrialJobDetail
}
from
'
./kubernetesData
'
;
import
{
kubernetesScriptFormat
,
KubernetesTrialJobDetail
}
from
'
./kubernetesData
'
;
import
{
KubernetesJobRestServer
}
from
'
./kubernetesJobRestServer
'
;
import
{
KubernetesJobRestServer
}
from
'
./kubernetesJobRestServer
'
;
var
fs
=
require
(
'
fs
'
);
/**
/**
* Training Service implementation for Kubernetes
* Training Service implementation for Kubernetes
*/
*/
...
@@ -327,5 +329,34 @@ abstract class KubernetesTrainingService {
...
@@ -327,5 +329,34 @@ abstract class KubernetesTrainingService {
return
Promise
.
resolve
();
return
Promise
.
resolve
();
}
}
protected
async
createRegistrySecret
(
filePath
:
string
|
undefined
):
Promise
<
string
|
undefined
>
{
if
(
filePath
===
undefined
||
filePath
===
''
)
{
return
undefined
;
}
let
body
=
fs
.
readFileSync
(
filePath
).
toString
(
'
base64
'
);
let
registrySecretName
=
String
.
Format
(
'
nni-secret-{0}
'
,
uniqueString
(
8
)
.
toLowerCase
());
await
this
.
genericK8sClient
.
createSecret
(
{
apiVersion
:
'
v1
'
,
kind
:
'
Secret
'
,
metadata
:
{
name
:
registrySecretName
,
namespace
:
'
default
'
,
labels
:
{
app
:
this
.
NNI_KUBERNETES_TRIAL_LABEL
,
expId
:
getExperimentId
()
}
},
type
:
'
kubernetes.io/dockerconfigjson
'
,
data
:
{
'
.dockerconfigjson
'
:
body
}
}
);
return
registrySecretName
;
}
}
}
export
{
KubernetesTrainingService
};
export
{
KubernetesTrainingService
};
src/nni_manager/training_service/pai/paiConfig.ts
View file @
88ef6c04
...
@@ -71,6 +71,8 @@ export class PAIJobConfig {
...
@@ -71,6 +71,8 @@ export class PAIJobConfig {
public
readonly
image
:
string
;
public
readonly
image
:
string
;
// Code directory on HDFS
// Code directory on HDFS
public
readonly
codeDir
:
string
;
public
readonly
codeDir
:
string
;
//authentication file used for private Docker registry
public
readonly
authFile
?:
string
;
// List of taskRole, one task role at least
// List of taskRole, one task role at least
public
taskRoles
:
PAITaskRole
[];
public
taskRoles
:
PAITaskRole
[];
...
@@ -87,12 +89,13 @@ export class PAIJobConfig {
...
@@ -87,12 +89,13 @@ export class PAIJobConfig {
* @param taskRoles List of taskRole, one task role at least
* @param taskRoles List of taskRole, one task role at least
*/
*/
constructor
(
jobName
:
string
,
image
:
string
,
codeDir
:
string
,
constructor
(
jobName
:
string
,
image
:
string
,
codeDir
:
string
,
taskRoles
:
PAITaskRole
[],
virtualCluster
:
string
)
{
taskRoles
:
PAITaskRole
[],
virtualCluster
:
string
,
authFile
?:
string
)
{
this
.
jobName
=
jobName
;
this
.
jobName
=
jobName
;
this
.
image
=
image
;
this
.
image
=
image
;
this
.
codeDir
=
codeDir
;
this
.
codeDir
=
codeDir
;
this
.
taskRoles
=
taskRoles
;
this
.
taskRoles
=
taskRoles
;
this
.
virtualCluster
=
virtualCluster
;
this
.
virtualCluster
=
virtualCluster
;
this
.
authFile
=
authFile
;
}
}
}
}
...
@@ -129,14 +132,17 @@ export class NNIPAITrialConfig extends TrialConfig {
...
@@ -129,14 +132,17 @@ export class NNIPAITrialConfig extends TrialConfig {
public
virtualCluster
?:
string
;
public
virtualCluster
?:
string
;
//Shared memory for one task in the task role
//Shared memory for one task in the task role
public
shmMB
?:
number
;
public
shmMB
?:
number
;
//authentication file used for private Docker registry
public
authFile
?:
string
;
constructor
(
command
:
string
,
codeDir
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
constructor
(
command
:
string
,
codeDir
:
string
,
gpuNum
:
number
,
cpuNum
:
number
,
memoryMB
:
number
,
image
:
string
,
virtualCluster
?:
string
,
shmMB
?:
number
)
{
image
:
string
,
virtualCluster
?:
string
,
shmMB
?:
number
,
authFile
?:
string
)
{
super
(
command
,
codeDir
,
gpuNum
);
super
(
command
,
codeDir
,
gpuNum
);
this
.
cpuNum
=
cpuNum
;
this
.
cpuNum
=
cpuNum
;
this
.
memoryMB
=
memoryMB
;
this
.
memoryMB
=
memoryMB
;
this
.
image
=
image
;
this
.
image
=
image
;
this
.
virtualCluster
=
virtualCluster
;
this
.
virtualCluster
=
virtualCluster
;
this
.
shmMB
=
shmMB
;
this
.
shmMB
=
shmMB
;
this
.
authFile
=
authFile
;
}
}
}
}
src/nni_manager/training_service/pai/paiTrainingService.ts
View file @
88ef6c04
...
@@ -442,7 +442,7 @@ class PAITrainingService implements TrainingService {
...
@@ -442,7 +442,7 @@ class PAITrainingService implements TrainingService {
// Task command
// Task command
nniPaiTrialCommand
,
nniPaiTrialCommand
,
// Task shared memory
// Task shared memory
this
.
paiTrialConfig
.
shmMB
this
.
paiTrialConfig
.
shmMB
,
)
)
];
];
...
@@ -456,7 +456,9 @@ class PAITrainingService implements TrainingService {
...
@@ -456,7 +456,9 @@ class PAITrainingService implements TrainingService {
// PAI Task roles
// PAI Task roles
paiTaskRoles
,
paiTaskRoles
,
// Add Virutal Cluster
// Add Virutal Cluster
this
.
paiTrialConfig
.
virtualCluster
===
undefined
?
'
default
'
:
this
.
paiTrialConfig
.
virtualCluster
.
toString
()
this
.
paiTrialConfig
.
virtualCluster
===
undefined
?
'
default
'
:
this
.
paiTrialConfig
.
virtualCluster
.
toString
(),
//Task auth File
this
.
paiTrialConfig
.
authFile
);
);
// Step 2. Upload code files in codeDir onto HDFS
// Step 2. Upload code files in codeDir onto HDFS
...
...
src/sdk/pycli/nnicli/__init__.py
0 → 100644
View file @
88ef6c04
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
from
.nni_client
import
*
src/sdk/pycli/nnicli/nni_client.py
0 → 100644
View file @
88ef6c04
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
# associated documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish, distribute,
# sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT
# OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# ==================================================================================================
""" A python wrapper for nni rest api
Example:
import nnicli as nc
nc.start_nni('../../../../examples/trials/mnist/config.yml')
nc.set_endpoint('http://localhost:8080')
print(nc.version())
print(nc.get_experiment_status())
print(nc.get_job_statistics())
print(nc.list_trial_jobs())
nc.stop_nni()
"""
import
sys
import
os
import
subprocess
import
requests
__all__
=
[
'start_nni'
,
'stop_nni'
,
'set_endpoint'
,
'version'
,
'get_experiment_status'
,
'get_experiment_profile'
,
'get_trial_job'
,
'list_trial_jobs'
,
'get_job_statistics'
,
'get_job_metrics'
,
'export_data'
]
EXPERIMENT_PATH
=
'experiment'
VERSION_PATH
=
'version'
STATUS_PATH
=
'check-status'
JOB_STATISTICS_PATH
=
'job-statistics'
TRIAL_JOBS_PATH
=
'trial-jobs'
METRICS_PATH
=
'metric-data'
EXPORT_DATA_PATH
=
'export-data'
API_ROOT_PATH
=
'api/v1/nni'
_api_endpoint
=
None
def
set_endpoint
(
endpoint
):
"""set endpoint of nni rest server for nnicli, for example:
http://localhost:8080
"""
global
_api_endpoint
_api_endpoint
=
endpoint
def
_check_endpoint
():
if
_api_endpoint
is
None
:
raise
AssertionError
(
"Please call set_endpoint to specify nni endpoint"
)
def
_nni_rest_get
(
api_path
,
response_type
=
'json'
):
_check_endpoint
()
uri
=
'{}/{}/{}'
.
format
(
_api_endpoint
,
API_ROOT_PATH
,
api_path
)
res
=
requests
.
get
(
uri
)
if
_http_succeed
(
res
.
status_code
):
if
response_type
==
'json'
:
return
res
.
json
()
elif
response_type
==
'text'
:
return
res
.
text
else
:
raise
AssertionError
(
'Incorrect response_type'
)
else
:
return
None
def
_http_succeed
(
status_code
):
return
status_code
//
100
==
2
def
_create_process
(
cmd
):
if
sys
.
platform
==
'win32'
:
process
=
subprocess
.
Popen
(
cmd
,
stdout
=
subprocess
.
PIPE
,
creationflags
=
subprocess
.
CREATE_NEW_PROCESS_GROUP
)
else
:
process
=
subprocess
.
Popen
(
cmd
,
stdout
=
subprocess
.
PIPE
)
while
process
.
poll
()
is
None
:
output
=
process
.
stdout
.
readline
()
if
output
:
print
(
output
.
decode
(
'utf-8'
).
strip
())
return
process
.
returncode
def
start_nni
(
config_file
):
"""start nni experiment with specified configuration file"""
cmd
=
'nnictl create --config {}'
.
format
(
config_file
).
split
(
' '
)
if
_create_process
(
cmd
)
!=
0
:
raise
RuntimeError
(
'Failed to start nni.'
)
def
stop_nni
():
"""stop nni experiment"""
cmd
=
'nnictl stop'
.
split
(
' '
)
if
_create_process
(
cmd
)
!=
0
:
raise
RuntimeError
(
'Failed to stop nni.'
)
def
version
():
"""return version of nni"""
return
_nni_rest_get
(
VERSION_PATH
,
'text'
)
def
get_experiment_status
():
"""return experiment status as a dict"""
return
_nni_rest_get
(
STATUS_PATH
)
def
get_experiment_profile
():
"""return experiment profile as a dict"""
return
_nni_rest_get
(
EXPERIMENT_PATH
)
def
get_trial_job
(
trial_job_id
):
"""return trial job information as a dict"""
assert
trial_job_id
is
not
None
return
_nni_rest_get
(
os
.
path
.
join
(
TRIAL_JOBS_PATH
,
trial_job_id
))
def
list_trial_jobs
():
"""return information for all trial jobs as a list"""
return
_nni_rest_get
(
TRIAL_JOBS_PATH
)
def
get_job_statistics
():
"""return trial job statistics information as a dict"""
return
_nni_rest_get
(
JOB_STATISTICS_PATH
)
def
get_job_metrics
(
trial_job_id
=
None
):
"""return trial job metrics"""
api_path
=
METRICS_PATH
if
trial_job_id
is
None
else
os
.
path
.
join
(
METRICS_PATH
,
trial_job_id
)
return
_nni_rest_get
(
api_path
)
def
export_data
():
"""return exported information for all trial jobs"""
return
_nni_rest_get
(
EXPORT_DATA_PATH
)
src/sdk/pycli/setup.py
0 → 100644
View file @
88ef6c04
import
setuptools
setuptools
.
setup
(
name
=
'nnicli'
,
version
=
'999.0.0-developing'
,
packages
=
setuptools
.
find_packages
(),
python_requires
=
'>=3.5'
,
install_requires
=
[
'requests'
],
author
=
'Microsoft NNI Team'
,
author_email
=
'nni@microsoft.com'
,
description
=
'nnicli for Neural Network Intelligence project'
,
license
=
'MIT'
,
url
=
'https://github.com/Microsoft/nni'
,
)
src/sdk/pynni/nni/hyperopt_tuner/hyperopt_tuner.py
View file @
88ef6c04
...
@@ -190,13 +190,19 @@ class HyperoptTuner(Tuner):
...
@@ -190,13 +190,19 @@ class HyperoptTuner(Tuner):
HyperoptTuner is a tuner which using hyperopt algorithm.
HyperoptTuner is a tuner which using hyperopt algorithm.
"""
"""
def
__init__
(
self
,
algorithm_name
,
optimize_mode
=
'minimize'
):
def
__init__
(
self
,
algorithm_name
,
optimize_mode
=
'minimize'
,
parallel_optimize
=
False
,
constant_liar_type
=
'min'
):
"""
"""
Parameters
Parameters
----------
----------
algorithm_name : str
algorithm_name : str
algorithm_name includes "tpe", "random_search" and anneal".
algorithm_name includes "tpe", "random_search" and anneal".
optimize_mode : str
optimize_mode : str
parallel_optimize : bool
More detail could reference: docs/en_US/Tuner/HyperoptTuner.md
constant_liar_type : str
constant_liar_type including "min", "max" and "mean"
More detail could reference: docs/en_US/Tuner/HyperoptTuner.md
"""
"""
self
.
algorithm_name
=
algorithm_name
self
.
algorithm_name
=
algorithm_name
self
.
optimize_mode
=
OptimizeMode
(
optimize_mode
)
self
.
optimize_mode
=
OptimizeMode
(
optimize_mode
)
...
@@ -205,6 +211,13 @@ class HyperoptTuner(Tuner):
...
@@ -205,6 +211,13 @@ class HyperoptTuner(Tuner):
self
.
rval
=
None
self
.
rval
=
None
self
.
supplement_data_num
=
0
self
.
supplement_data_num
=
0
self
.
parallel
=
parallel_optimize
if
self
.
parallel
:
self
.
CL_rval
=
None
self
.
constant_liar_type
=
constant_liar_type
self
.
running_data
=
[]
self
.
optimal_y
=
None
def
_choose_tuner
(
self
,
algorithm_name
):
def
_choose_tuner
(
self
,
algorithm_name
):
"""
"""
Parameters
Parameters
...
@@ -266,6 +279,10 @@ class HyperoptTuner(Tuner):
...
@@ -266,6 +279,10 @@ class HyperoptTuner(Tuner):
# but it can cause deplicate parameter rarely
# but it can cause deplicate parameter rarely
total_params
=
self
.
get_suggestion
(
random_search
=
True
)
total_params
=
self
.
get_suggestion
(
random_search
=
True
)
self
.
total_data
[
parameter_id
]
=
total_params
self
.
total_data
[
parameter_id
]
=
total_params
if
self
.
parallel
:
self
.
running_data
.
append
(
parameter_id
)
params
=
split_index
(
total_params
)
params
=
split_index
(
total_params
)
return
params
return
params
...
@@ -287,10 +304,39 @@ class HyperoptTuner(Tuner):
...
@@ -287,10 +304,39 @@ class HyperoptTuner(Tuner):
raise
RuntimeError
(
'Received parameter_id not in total_data.'
)
raise
RuntimeError
(
'Received parameter_id not in total_data.'
)
params
=
self
.
total_data
[
parameter_id
]
params
=
self
.
total_data
[
parameter_id
]
# code for parallel
if
self
.
parallel
:
constant_liar
=
kwargs
.
get
(
'constant_liar'
,
False
)
if
constant_liar
:
rval
=
self
.
CL_rval
else
:
rval
=
self
.
rval
self
.
running_data
.
remove
(
parameter_id
)
# update the reward of optimal_y
if
self
.
optimal_y
is
None
:
if
self
.
constant_liar_type
==
'mean'
:
self
.
optimal_y
=
[
reward
,
1
]
else
:
self
.
optimal_y
=
reward
else
:
if
self
.
constant_liar_type
==
'mean'
:
_sum
=
self
.
optimal_y
[
0
]
+
reward
_number
=
self
.
optimal_y
[
1
]
+
1
self
.
optimal_y
=
[
_sum
,
_number
]
elif
self
.
constant_liar_type
==
'min'
:
self
.
optimal_y
=
min
(
self
.
optimal_y
,
reward
)
elif
self
.
constant_liar_type
==
'max'
:
self
.
optimal_y
=
max
(
self
.
optimal_y
,
reward
)
logger
.
debug
(
"Update optimal_y with reward, optimal_y = %s"
,
self
.
optimal_y
)
else
:
rval
=
self
.
rval
if
self
.
optimize_mode
is
OptimizeMode
.
Maximize
:
if
self
.
optimize_mode
is
OptimizeMode
.
Maximize
:
reward
=
-
reward
reward
=
-
reward
rval
=
self
.
rval
domain
=
rval
.
domain
domain
=
rval
.
domain
trials
=
rval
.
trials
trials
=
rval
.
trials
...
@@ -375,13 +421,26 @@ class HyperoptTuner(Tuner):
...
@@ -375,13 +421,26 @@ class HyperoptTuner(Tuner):
total_params : dict
total_params : dict
parameter suggestion
parameter suggestion
"""
"""
if
self
.
parallel
and
len
(
self
.
total_data
)
>
20
and
len
(
self
.
running_data
)
and
self
.
optimal_y
is
not
None
:
self
.
CL_rval
=
copy
.
deepcopy
(
self
.
rval
)
if
self
.
constant_liar_type
==
'mean'
:
_constant_liar_y
=
self
.
optimal_y
[
0
]
/
self
.
optimal_y
[
1
]
else
:
_constant_liar_y
=
self
.
optimal_y
for
_parameter_id
in
self
.
running_data
:
self
.
receive_trial_result
(
parameter_id
=
_parameter_id
,
parameters
=
None
,
value
=
_constant_liar_y
,
constant_liar
=
True
)
rval
=
self
.
CL_rval
rval
=
self
.
rval
random_state
=
np
.
random
.
randint
(
2
**
31
-
1
)
else
:
rval
=
self
.
rval
random_state
=
rval
.
rstate
.
randint
(
2
**
31
-
1
)
trials
=
rval
.
trials
trials
=
rval
.
trials
algorithm
=
rval
.
algo
algorithm
=
rval
.
algo
new_ids
=
rval
.
trials
.
new_trial_ids
(
1
)
new_ids
=
rval
.
trials
.
new_trial_ids
(
1
)
rval
.
trials
.
refresh
()
rval
.
trials
.
refresh
()
random_state
=
rval
.
rstate
.
randint
(
2
**
31
-
1
)
if
random_search
:
if
random_search
:
new_trials
=
hp
.
rand
.
suggest
(
new_ids
,
rval
.
domain
,
trials
,
new_trials
=
hp
.
rand
.
suggest
(
new_ids
,
rval
.
domain
,
trials
,
random_state
)
random_state
)
...
...
test/cli_test.py
0 → 100644
View file @
88ef6c04
# Copyright (c) Microsoft Corporation
# All rights reserved.
#
# MIT License
#
# Permission is hereby granted, free of charge,
# to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction,
# including without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and
# to permit persons to whom the Software is furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING
# BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import
sys
import
time
import
traceback
from
utils
import
GREEN
,
RED
,
CLEAR
,
setup_experiment
def
test_nni_cli
():
import
nnicli
as
nc
config_file
=
'config_test/examples/mnist.test.yml'
try
:
# Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict
time
.
sleep
(
6
)
print
(
GREEN
+
'Testing nnicli:'
+
config_file
+
CLEAR
)
nc
.
start_nni
(
config_file
)
time
.
sleep
(
3
)
nc
.
set_endpoint
(
'http://localhost:8080'
)
print
(
nc
.
version
())
print
(
nc
.
get_job_statistics
())
print
(
nc
.
get_experiment_status
())
nc
.
list_trial_jobs
()
print
(
GREEN
+
'Test nnicli {}: TEST PASS'
.
format
(
config_file
)
+
CLEAR
)
except
Exception
as
error
:
print
(
RED
+
'Test nnicli {}: TEST FAIL'
.
format
(
config_file
)
+
CLEAR
)
print
(
'%r'
%
error
)
traceback
.
print_exc
()
raise
error
finally
:
nc
.
stop_nni
()
if
__name__
==
'__main__'
:
installed
=
(
sys
.
argv
[
-
1
]
!=
'--preinstall'
)
setup_experiment
(
installed
)
test_nni_cli
()
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment