Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
055885d9
Unverified
Commit
055885d9
authored
Nov 25, 2020
by
SparkSnail
Committed by
GitHub
Nov 25, 2020
Browse files
Merge dev-adl2 into Master (#3117)
parent
2c5d89a7
Changes
42
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
145 additions
and
1 deletion
+145
-1
ts/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts
.../training_service/kubernetes/kubernetesTrainingService.ts
+7
-1
ts/nni_manager/training_service/test/adlTrainingService.test.ts
..._manager/training_service/test/adlTrainingService.test.ts
+138
-0
No files found.
ts/nni_manager/training_service/kubernetes/kubernetesTrainingService.ts
View file @
055885d9
...
...
@@ -209,6 +209,13 @@ abstract class KubernetesTrainingService {
return
Promise
.
reject
(
error
);
}
try
{
await
this
.
genericK8sClient
.
deleteDeployment
(
"
adaptdl-tensorboard-
"
+
getExperimentId
().
toLowerCase
())
this
.
log
.
info
(
'
tensorboard deployment deleted
'
)
}
catch
(
error
)
{
this
.
log
.
error
(
`tensorboard deployment deletion failed:
${
error
.
message
}
`
)
}
return
Promise
.
resolve
();
}
...
...
@@ -377,6 +384,5 @@ abstract class KubernetesTrainingService {
}
return
Promise
.
resolve
(
folderUriInAzure
);
}
}
export
{
KubernetesTrainingService
};
ts/nni_manager/training_service/test/adlTrainingService.test.ts
0 → 100644
View file @
055885d9
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
chai
from
'
chai
'
;
import
*
as
chaiAsPromised
from
'
chai-as-promised
'
;
import
*
as
fs
from
'
fs
'
;
import
*
as
tmp
from
'
tmp
'
;
import
*
as
component
from
'
../../common/component
'
;
import
{
TrialJobApplicationForm
,
TrialJobDetail
,
TrainingService
}
from
'
../../common/trainingService
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
}
from
'
../../common/utils
'
;
import
{
TrialConfigMetadataKey
}
from
'
../common/trialConfigMetadataKey
'
;
import
{
AdlTrainingService
}
from
'
../kubernetes/adl/adlTrainingService
'
;
const
localCodeDir
:
string
=
tmp
.
dirSync
().
name
describe
(
'
Unit Test for AdlTrainingService
'
,
()
=>
{
let
skip
:
boolean
=
false
;
try
{
const
testKubeflowConfig
=
fs
.
readFileSync
(
'
/home/vsts/.kube/config
'
,
'
utf8
'
);
}
catch
(
err
)
{
console
.
log
(
'
Please have kubernetes cluster to enable its training service unit test.
'
);
skip
=
true
;
}
let
testAdlTrialConfig
:
any
=
JSON
.
stringify
({
"
command
"
:
"
python3 /root/apps/nni_linear_regression/main.py
"
,
"
codeDir
"
:
"
.
"
,
"
gpuNum
"
:
0
,
"
image
"
:
"
test.image:latest
"
,
"
imagePullSecrets
"
:
[
{
"
name
"
:
"
stagingsecrets
"
}
],
"
nfs
"
:
{
"
server
"
:
"
172.20.188.236
"
,
"
path
"
:
"
/exports
"
,
"
containerMountPath
"
:
"
/nfs
"
},
"
memorySize
"
:
"
1Gi
"
,
"
cpuNum
"
:
1
});
let
testAdlTrialConfig2
:
any
=
JSON
.
stringify
({
"
command
"
:
"
python3 /root/apps/nni_linear_regression/main.py
"
,
"
codeDir
"
:
"
.
"
,
"
gpuNum
"
:
0
,
"
image
"
:
"
test.image:latest
"
,
"
imagePullSecrets
"
:
[
{
"
name
"
:
"
stagingsecrets
"
}
],
"
adaptive
"
:
true
,
"
checkpoint
"
:
{
"
storageClass
"
:
"
aws-efs
"
,
"
storageSize
"
:
"
1Gi
"
},
"
nfs
"
:
{
"
server
"
:
"
172.20.188.236
"
,
"
path
"
:
"
/exports
"
,
"
containerMountPath
"
:
"
/nfs
"
}
});
let
testNniManagerIp
:
any
=
JSON
.
stringify
({
"
nniManagerIp
"
:
"
0.0.0.0
"
});
let
adlTrainingService
:
AdlTrainingService
;
console
.
log
(
tmp
.
dirSync
().
name
);
before
(()
=>
{
chai
.
should
();
chai
.
use
(
chaiAsPromised
);
prepareUnitTest
();
});
after
(()
=>
{
cleanupUnitTest
();
});
beforeEach
(()
=>
{
if
(
skip
)
{
return
;
}
adlTrainingService
=
component
.
get
(
AdlTrainingService
);
adlTrainingService
.
run
()
});
afterEach
(()
=>
{
if
(
skip
)
{
return
;
}
adlTrainingService
.
cleanUp
();
});
it
(
'
Set and get cluster metadata
'
,
async
()
=>
{
if
(
skip
)
{
return
;
}
await
adlTrainingService
.
setClusterMetadata
(
TrialConfigMetadataKey
.
TRIAL_CONFIG
,
testAdlTrialConfig2
);
await
adlTrainingService
.
setClusterMetadata
(
TrialConfigMetadataKey
.
NNI_MANAGER_IP
,
testNniManagerIp
);
let
data
:
string
=
await
adlTrainingService
.
getClusterMetadata
(
TrialConfigMetadataKey
.
TRIAL_CONFIG
);
chai
.
expect
(
data
).
to
.
be
.
equals
(
testAdlTrialConfig2
);
});
it
(
'
Submit job
'
,
async
()
=>
{
if
(
skip
)
{
return
;
}
// job without given checkpoint, with resource config
await
adlTrainingService
.
setClusterMetadata
(
TrialConfigMetadataKey
.
TRIAL_CONFIG
,
testAdlTrialConfig
);
let
form
:
TrialJobApplicationForm
=
{
sequenceId
:
0
,
hyperParameters
:
{
value
:
'
mock hyperparameters
'
,
index
:
0
}
};
let
jobDetail
:
TrialJobDetail
=
await
adlTrainingService
.
submitTrialJob
(
form
);
chai
.
expect
(
jobDetail
.
status
).
to
.
be
.
equals
(
'
WAITING
'
);
await
adlTrainingService
.
cancelTrialJob
(
jobDetail
.
id
);
chai
.
expect
(
jobDetail
.
status
).
to
.
be
.
equals
(
'
USER_CANCELED
'
);
// job with given checkpoint
await
adlTrainingService
.
setClusterMetadata
(
TrialConfigMetadataKey
.
TRIAL_CONFIG
,
testAdlTrialConfig2
);
form
=
{
sequenceId
:
0
,
hyperParameters
:
{
value
:
'
mock hyperparameters
'
,
index
:
0
}
};
jobDetail
=
await
adlTrainingService
.
submitTrialJob
(
form
);
chai
.
expect
(
jobDetail
.
status
).
to
.
be
.
equals
(
'
WAITING
'
);
await
adlTrainingService
.
cancelTrialJob
(
jobDetail
.
id
);
chai
.
expect
(
jobDetail
.
status
).
to
.
be
.
equals
(
'
USER_CANCELED
'
);
}).
timeout
(
3000000
);
});
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment