Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
7afe8a71
"vscode:/vscode.git/clone" did not exist on "24fa4619cf01bc4279ff4ba2051bb73f8f049ee3"
Unverified
Commit
7afe8a71
authored
Jun 12, 2022
by
Weidan Kong
Committed by
GitHub
Jun 13, 2022
Browse files
[DLC]: pai-dlc api update & log folder update (#4909)
parent
2815fb1f
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
60 additions
and
41 deletions
+60
-41
docs/source/experiment/training_service/paidlc.rst
docs/source/experiment/training_service/paidlc.rst
+1
-0
examples/trials/mnist-pytorch/config_dlc.yml
examples/trials/mnist-pytorch/config_dlc.yml
+1
-0
nni/experiment/config/training_services/dlc.py
nni/experiment/config/training_services/dlc.py
+1
-0
ts/nni_manager/common/experimentConfig.ts
ts/nni_manager/common/experimentConfig.ts
+1
-0
ts/nni_manager/config/dlc/dlcUtil.py
ts/nni_manager/config/dlc/dlcUtil.py
+49
-40
ts/nni_manager/training_service/reusable/dlc/dlcClient.ts
ts/nni_manager/training_service/reusable/dlc/dlcClient.ts
+4
-0
ts/nni_manager/training_service/reusable/environments/dlcEnvironmentService.ts
...ng_service/reusable/environments/dlcEnvironmentService.ts
+3
-1
No files found.
docs/source/experiment/training_service/paidlc.rst
View file @
7afe8a71
...
...
@@ -51,6 +51,7 @@ Use ``examples/trials/mnist-pytorch`` as an example. The NNI config YAML file's
podCount: 1
ecsSpec: ecs.c6.large
region: cn-hangzhou
workspaceId: ${your_workspace_id}
accessKeyId: ${your_ak_id}
accessKeySecret: ${your_ak_key}
nasDataSourceId: ${your_nas_data_source_id} # NAS datasource ID, e.g., datat56by9n1xt0a
...
...
examples/trials/mnist-pytorch/config_dlc.yml
View file @
7afe8a71
...
...
@@ -17,6 +17,7 @@ trainingService:
podCount
:
1
ecsSpec
:
ecs.c6.large
region
:
cn-hangzhou
workspaceId
:
${your_workspace_id}
accessKeyId
:
${your_ak_id}
accessKeySecret
:
${your_ak_key}
nasDataSourceId
:
${your_nas_data_source_id}
# NAS datasource ID,e.g., datat56by9n1xt0a
...
...
nni/experiment/config/training_services/dlc.py
View file @
7afe8a71
...
...
@@ -17,6 +17,7 @@ class DlcConfig(TrainingServiceConfig):
pod_count
:
int
ecs_spec
:
str
# e.g.,'ecs.c6.large'
region
:
str
workspace_id
:
str
nas_data_source_id
:
str
oss_data_source_id
:
Optional
[
str
]
=
None
access_key_id
:
str
...
...
ts/nni_manager/common/experimentConfig.ts
View file @
7afe8a71
...
...
@@ -92,6 +92,7 @@ export interface DlcConfig extends TrainingServiceConfig {
podCount
:
number
;
ecsSpec
:
string
;
region
:
string
;
workspaceId
:
string
;
nasDataSourceId
:
string
;
ossDataSourceId
?:
string
;
accessKeyId
:
string
;
...
...
ts/nni_manager/config/dlc/dlcUtil.py
View file @
7afe8a71
...
...
@@ -2,7 +2,9 @@
# Licensed under the MIT license.
import
logging
import
os
import
pathlib
import
sys
import
traceback
from
argparse
import
ArgumentParser
...
...
@@ -19,6 +21,7 @@ if __name__ == "__main__":
parser
.
add_argument
(
'--pod_count'
,
type
=
int
,
default
=
1
,
help
=
'pod count'
)
parser
.
add_argument
(
'--ecs_spec'
,
help
=
'ecs spec'
)
parser
.
add_argument
(
'--region'
,
help
=
'region'
)
parser
.
add_argument
(
'--workspace_id'
,
help
=
'workspace id for your project'
)
parser
.
add_argument
(
'--nas_data_source_id'
,
help
=
'nas data_source_id of DLC dataset configuration'
)
parser
.
add_argument
(
'--oss_data_source_id'
,
help
=
'oss data_source_id of DLC dataset configuration'
)
parser
.
add_argument
(
'--access_key_id'
,
help
=
'access_key_id'
)
...
...
@@ -28,6 +31,14 @@ if __name__ == "__main__":
parser
.
add_argument
(
'--log_dir'
,
help
=
'exception log dir'
)
args
=
parser
.
parse_args
()
pathlib
.
Path
(
args
.
log_dir
).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
logging
.
basicConfig
(
filename
=
os
.
path
.
join
(
args
.
log_dir
,
'dlc_exception.log'
),
format
=
'%(asctime)s %(message)s'
,
level
=
logging
.
INFO
)
# DLC submit
try
:
# init client
client
=
Client
(
Config
(
...
...
@@ -66,11 +77,10 @@ if __name__ == "__main__":
job_type
=
args
.
job_type
,
job_specs
=
[
spec
],
data_sources
=
data_sources
,
user_command
=
args
.
user_command
user_command
=
args
.
user_command
,
workspace_id
=
args
.
workspace_id
,
)
# DLC submit
try
:
response
=
client
.
create_job
(
req
)
job_id
=
response
.
body
.
job_id
print
(
'job id: '
+
job_id
)
...
...
@@ -86,6 +96,5 @@ if __name__ == "__main__":
client
.
stop_job
(
job_id
)
exit
(
0
)
except
Exception
as
e
:
with
open
(
os
.
path
.
join
(
args
.
log_dir
,
'dlc_exception.log'
),
'w'
)
as
f
:
f
.
write
(
'DLC submit Exception:
\n
'
)
traceback
.
print_exc
(
file
=
f
)
logging
.
error
(
'DLC submit Exception:
\n
'
)
logging
.
error
(
e
,
exc_info
=
1
)
ts/nni_manager/training_service/reusable/dlc/dlcClient.ts
View file @
7afe8a71
...
...
@@ -13,6 +13,7 @@ export class DlcClient {
public
podCount
:
number
;
public
ecsSpec
:
string
;
public
region
:
string
;
public
workspaceId
:
string
;
// e.g., data1e6vg1tu0zi7, to generate it, go to 'Dataset Config' page of DLC
// create a NAS data and copy the 'DataSet ConfigurationID'
public
nasDataSourceId
:
string
;
...
...
@@ -35,6 +36,7 @@ export class DlcClient {
environmentId
:
string
,
ecsSpec
:
string
,
region
:
string
,
workspaceId
:
string
,
nasDataSourceId
:
string
,
accessKeyId
:
string
,
accessKeySecret
:
string
,
...
...
@@ -50,6 +52,7 @@ export class DlcClient {
this
.
ecsSpec
=
ecsSpec
;
this
.
image
=
image
;
this
.
region
=
region
;
this
.
workspaceId
=
workspaceId
;
this
.
nasDataSourceId
=
nasDataSourceId
;
if
(
ossDataSourceId
!==
undefined
)
{
this
.
ossDataSourceId
=
ossDataSourceId
;
...
...
@@ -77,6 +80,7 @@ export class DlcClient {
'
--pod_count
'
,
String
(
this
.
podCount
),
'
--ecs_spec
'
,
this
.
ecsSpec
,
'
--region
'
,
this
.
region
,
'
--workspace_id
'
,
this
.
workspaceId
,
'
--nas_data_source_id
'
,
this
.
nasDataSourceId
,
'
--oss_data_source_id
'
,
this
.
ossDataSourceId
,
'
--access_key_id
'
,
this
.
accessKeyId
,
...
...
ts/nni_manager/training_service/reusable/environments/dlcEnvironmentService.ts
View file @
7afe8a71
...
...
@@ -15,6 +15,7 @@ import { FileCommandChannel } from '../channels/fileCommandChannel';
import
{
MountedStorageService
}
from
'
../storages/mountedStorageService
'
;
import
{
Scope
}
from
'
typescript-ioc
'
;
import
{
StorageService
}
from
'
../storageService
'
;
import
{
getLogDir
}
from
'
common/utils
'
;
/**
* Collector DLC jobs info from DLC cluster, and update dlc job status locally
...
...
@@ -112,11 +113,12 @@ export class DlcEnvironmentService extends EnvironmentService {
environment
.
id
,
this
.
config
.
ecsSpec
,
this
.
config
.
region
,
this
.
config
.
workspaceId
,
this
.
config
.
nasDataSourceId
,
this
.
config
.
accessKeyId
,
this
.
config
.
accessKeySecret
,
environment
.
command
,
dlcEnvironment
.
workingFolder
,
path
.
join
(
getLogDir
(),
`envs/
${
environment
.
id
}
`
)
,
this
.
config
.
ossDataSourceId
,
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment