Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
a5efb4e6
"include/vscode:/vscode.git/clone" did not exist on "9efd033bee1301f13e6645752ecb01c26fa76903"
Unverified
Commit
a5efb4e6
authored
Apr 25, 2021
by
SparkSnail
Committed by
GitHub
Apr 25, 2021
Browse files
Fix v2 config version_check and log_collection (#3575)
parent
e19f5d26
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
35 additions
and
3 deletions
+35
-3
nni/tools/nnictl/launcher.py
nni/tools/nnictl/launcher.py
+29
-0
ts/nni_manager/rest_server/restValidationSchemas.ts
ts/nni_manager/rest_server/restValidationSchemas.ts
+2
-0
ts/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts
...frameworkcontroller/frameworkcontrollerTrainingService.ts
+2
-1
ts/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts
...ng_service/kubernetes/kubeflow/kubeflowTrainingService.ts
+2
-1
ts/nni_manager/training_service/kubernetes/kubernetesJobRestServer.ts
...er/training_service/kubernetes/kubernetesJobRestServer.ts
+0
-1
No files found.
nni/tools/nnictl/launcher.py
View file @
a5efb4e6
...
...
@@ -131,12 +131,39 @@ def set_adl_config(experiment_config, port, config_file_name):
with
open
(
stderr_full_path
,
'a+'
)
as
fout
:
fout
.
write
(
json
.
dumps
(
json
.
loads
(
err_message
),
indent
=
4
,
sort_keys
=
True
,
separators
=
(
','
,
':'
)))
return
False
,
err_message
set_V1_common_config
(
experiment_config
,
port
,
config_file_name
)
result
,
message
=
setNNIManagerIp
(
experiment_config
,
port
,
config_file_name
)
if
not
result
:
return
result
,
message
#set trial_config
return
set_trial_config
(
experiment_config
,
port
,
config_file_name
),
None
def
validate_response
(
response
,
config_file_name
):
err_message
=
None
if
not
response
or
not
response
.
status_code
==
200
:
if
response
is
not
None
:
err_message
=
response
.
text
_
,
stderr_full_path
=
get_log_path
(
config_file_name
)
with
open
(
stderr_full_path
,
'a+'
)
as
fout
:
fout
.
write
(
json
.
dumps
(
json
.
loads
(
err_message
),
indent
=
4
,
sort_keys
=
True
,
separators
=
(
','
,
':'
)))
print_error
(
'Error:'
+
err_message
)
exit
(
1
)
# hack to fix v1 version_check and log_collection bug, need refactor
def
set_V1_common_config
(
experiment_config
,
port
,
config_file_name
):
version_check
=
True
#debug mode should disable version check
if
experiment_config
.
get
(
'debug'
)
is
not
None
:
version_check
=
not
experiment_config
.
get
(
'debug'
)
#validate version check
if
experiment_config
.
get
(
'versionCheck'
)
is
not
None
:
version_check
=
experiment_config
.
get
(
'versionCheck'
)
response
=
rest_put
(
cluster_metadata_url
(
port
),
json
.
dumps
({
'version_check'
:
version_check
}),
REST_TIME_OUT
)
validate_response
(
response
,
config_file_name
)
if
experiment_config
.
get
(
'logCollection'
):
response
=
rest_put
(
cluster_metadata_url
(
port
),
json
.
dumps
({
'log_collection'
:
experiment_config
.
get
(
'logCollection'
)}),
REST_TIME_OUT
)
validate_response
(
response
,
config_file_name
)
def
setNNIManagerIp
(
experiment_config
,
port
,
config_file_name
):
'''set nniManagerIp'''
if
experiment_config
.
get
(
'nniManagerIp'
)
is
None
:
...
...
@@ -167,6 +194,7 @@ def set_kubeflow_config(experiment_config, port, config_file_name):
with
open
(
stderr_full_path
,
'a+'
)
as
fout
:
fout
.
write
(
json
.
dumps
(
json
.
loads
(
err_message
),
indent
=
4
,
sort_keys
=
True
,
separators
=
(
','
,
':'
)))
return
False
,
err_message
set_V1_common_config
(
experiment_config
,
port
,
config_file_name
)
result
,
message
=
setNNIManagerIp
(
experiment_config
,
port
,
config_file_name
)
if
not
result
:
return
result
,
message
...
...
@@ -186,6 +214,7 @@ def set_frameworkcontroller_config(experiment_config, port, config_file_name):
with
open
(
stderr_full_path
,
'a+'
)
as
fout
:
fout
.
write
(
json
.
dumps
(
json
.
loads
(
err_message
),
indent
=
4
,
sort_keys
=
True
,
separators
=
(
','
,
':'
)))
return
False
,
err_message
set_V1_common_config
(
experiment_config
,
port
,
config_file_name
)
result
,
message
=
setNNIManagerIp
(
experiment_config
,
port
,
config_file_name
)
if
not
result
:
return
result
,
message
...
...
ts/nni_manager/rest_server/restValidationSchemas.ts
View file @
a5efb4e6
...
...
@@ -197,6 +197,8 @@ export namespace ValidationSchemas {
nni_manager_ip
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
nniManagerIp
:
joi
.
string
().
min
(
1
)
}),
version_check
:
joi
.
boolean
(),
// eslint-disable-line @typescript-eslint/camelcase
log_collection
:
joi
.
string
(),
// eslint-disable-line @typescript-eslint/camelcase
remote_config
:
joi
.
object
({
// eslint-disable-line @typescript-eslint/camelcase
reuse
:
joi
.
boolean
()
}),
...
...
ts/nni_manager/training_service/kubernetes/frameworkcontroller/frameworkcontrollerTrainingService.ts
View file @
a5efb4e6
...
...
@@ -19,6 +19,7 @@ import {validateCodeDir} from '../../common/util';
import
{
NFSConfig
}
from
'
../kubernetesConfig
'
;
import
{
KubernetesTrialJobDetail
}
from
'
../kubernetesData
'
;
import
{
KubernetesTrainingService
}
from
'
../kubernetesTrainingService
'
;
import
{
KubernetesJobRestServer
}
from
'
../kubernetesJobRestServer
'
;
import
{
FrameworkControllerClientFactory
}
from
'
./frameworkcontrollerApiClient
'
;
import
{
FrameworkControllerClusterConfig
,
...
...
@@ -52,7 +53,7 @@ class FrameworkControllerTrainingService extends KubernetesTrainingService imple
}
public
async
run
():
Promise
<
void
>
{
this
.
kubernetesJobRestServer
=
component
.
get
(
FrameworkController
JobRestServer
);
this
.
kubernetesJobRestServer
=
new
Kubernetes
JobRestServer
(
this
);
if
(
this
.
kubernetesJobRestServer
===
undefined
)
{
throw
new
Error
(
'
kubernetesJobRestServer not initialized!
'
);
}
...
...
ts/nni_manager/training_service/kubernetes/kubeflow/kubeflowTrainingService.ts
View file @
a5efb4e6
...
...
@@ -19,6 +19,7 @@ import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey';
import
{
validateCodeDir
}
from
'
../../common/util
'
;
import
{
NFSConfig
}
from
'
../kubernetesConfig
'
;
import
{
KubernetesTrialJobDetail
}
from
'
../kubernetesData
'
;
import
{
KubernetesJobRestServer
}
from
'
../kubernetesJobRestServer
'
;
import
{
KubernetesTrainingService
}
from
'
../kubernetesTrainingService
'
;
import
{
KubeflowOperatorClientFactory
}
from
'
./kubeflowApiClient
'
;
import
{
KubeflowClusterConfig
,
KubeflowClusterConfigAzure
,
KubeflowClusterConfigFactory
,
KubeflowClusterConfigNFS
,
...
...
@@ -46,7 +47,7 @@ class KubeflowTrainingService extends KubernetesTrainingService implements Kuber
public
async
run
():
Promise
<
void
>
{
this
.
log
.
info
(
'
Run Kubeflow training service.
'
);
this
.
kubernetesJobRestServer
=
component
.
get
(
Kubeflow
JobRestServer
);
this
.
kubernetesJobRestServer
=
new
Kubernetes
JobRestServer
(
this
);
if
(
this
.
kubernetesJobRestServer
===
undefined
)
{
throw
new
Error
(
'
kubernetesJobRestServer not initialized!
'
);
}
...
...
ts/nni_manager/training_service/kubernetes/kubernetesJobRestServer.ts
View file @
a5efb4e6
...
...
@@ -16,7 +16,6 @@ import { KubernetesTrainingService } from './kubernetesTrainingService';
export
class
KubernetesJobRestServer
extends
ClusterJobRestServer
{
@
Inject
private
readonly
kubernetesTrainingService
?
:
KubernetesTrainingService
;
/**
* constructor to provide NNIRestServer's own rest property, e.g. port
*/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment