Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
ef15fc81
Unverified
Commit
ef15fc81
authored
Jun 21, 2021
by
liuzhe-lz
Committed by
GitHub
Jun 21, 2021
Browse files
Bump node.js version to v16 (#3828)
parent
b2225436
Changes
45
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
100 additions
and
852 deletions
+100
-852
pipelines/fast-test.yml
pipelines/fast-test.yml
+24
-5
setup_ts.py
setup_ts.py
+1
-1
ts/nni_manager/.eslintrc
ts/nni_manager/.eslintrc
+5
-3
ts/nni_manager/common/utils.ts
ts/nni_manager/common/utils.ts
+1
-1
ts/nni_manager/core/nniTensorboardManager.ts
ts/nni_manager/core/nniTensorboardManager.ts
+1
-1
ts/nni_manager/core/nnimanager.ts
ts/nni_manager/core/nnimanager.ts
+1
-1
ts/nni_manager/core/test/ipcInterface.test.ts
ts/nni_manager/core/test/ipcInterface.test.ts
+1
-1
ts/nni_manager/package.json
ts/nni_manager/package.json
+45
-53
ts/nni_manager/rest_server/restHandler.ts
ts/nni_manager/rest_server/restHandler.ts
+7
-4
ts/nni_manager/rest_server/restValidationSchemas.ts
ts/nni_manager/rest_server/restValidationSchemas.ts
+2
-2
ts/nni_manager/training_service/dlts/dltsClusterConfig.ts
ts/nni_manager/training_service/dlts/dltsClusterConfig.ts
+0
-14
ts/nni_manager/training_service/dlts/dltsData.ts
ts/nni_manager/training_service/dlts/dltsData.ts
+0
-8
ts/nni_manager/training_service/dlts/dltsJobConfig.ts
ts/nni_manager/training_service/dlts/dltsJobConfig.ts
+0
-45
ts/nni_manager/training_service/dlts/dltsJobRestServer.ts
ts/nni_manager/training_service/dlts/dltsJobRestServer.ts
+0
-77
ts/nni_manager/training_service/dlts/dltsTrainingService.ts
ts/nni_manager/training_service/dlts/dltsTrainingService.ts
+0
-578
ts/nni_manager/training_service/dlts/dltsTrialConfig.ts
ts/nni_manager/training_service/dlts/dltsTrialConfig.ts
+0
-15
ts/nni_manager/training_service/dlts/dltsTrialJobDetail.ts
ts/nni_manager/training_service/dlts/dltsTrialJobDetail.ts
+0
-31
ts/nni_manager/training_service/remote_machine/shellExecutor.ts
..._manager/training_service/remote_machine/shellExecutor.ts
+3
-3
ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts
...ervice/reusable/environments/openPaiEnvironmentService.ts
+1
-1
ts/nni_manager/training_service/reusable/storageService.ts
ts/nni_manager/training_service/reusable/storageService.ts
+8
-8
No files found.
pipelines/fast-test.yml
View file @
ef15fc81
...
...
@@ -87,6 +87,10 @@ stages:
variables
:
YARN_CACHE_FOLDER
:
$(Pipeline.Workspace)/.yarn
steps
:
-
task
:
NodeTool@0
inputs
:
versionSpec
:
16.3.0
displayName
:
Configure Node.js version
-
task
:
Cache@2
inputs
:
key
:
'
yarn
|
"$(Agent.OS)"
|
ts/**/yarn.lock,
!**/node_modules/**'
...
...
@@ -123,6 +127,11 @@ stages:
versionSpec
:
3.8
displayName
:
Configure Python version
-
task
:
NodeTool@0
inputs
:
versionSpec
:
16.3.0
displayName
:
Configure Node.js version
-
script
:
|
sudo apt-get install -y pandoc
sudo apt-get remove swig -y
...
...
@@ -201,13 +210,17 @@ stages:
PIP_CACHE_DIR
:
$(Pipeline.Workspace)/.pip
YARN_CACHE_FOLDER
:
$(Pipeline.Workspace)/.yarn
# This platform runs integration test first.
steps
:
-
task
:
UsePythonVersion@0
inputs
:
versionSpec
:
3.6
displayName
:
Configure Python version
-
task
:
NodeTool@0
inputs
:
versionSpec
:
16.3.0
displayName
:
Configure Node.js version
-
script
:
|
sudo apt-get install -y pandoc
sudo apt-get remove swig -y
...
...
@@ -283,14 +296,17 @@ stages:
PIP_CACHE_DIR
:
$(Pipeline.Workspace)/.pip
YARN_CACHE_FOLDER
:
$(Pipeline.Workspace)/.yarn
# This platform runs TypeScript unit test first.
steps
:
-
task
:
UsePythonVersion@0
inputs
:
versionSpec
:
3.8
displayName
:
Configure Python version
-
task
:
NodeTool@0
inputs
:
versionSpec
:
16.3.0
displayName
:
Configure Node.js version
-
script
:
|
brew install swig@3
rm -f /usr/local/bin/swig
...
...
@@ -361,14 +377,17 @@ stages:
PIP_CACHE_DIR
:
$(Pipeline.Workspace)/.pip
YARN_CACHE_FOLDER
:
$(Pipeline.Workspace)/.yarn
# This platform runs Python unit test first.
steps
:
-
task
:
UsePythonVersion@0
inputs
:
versionSpec
:
3.8
displayName
:
Configure Python version
-
task
:
NodeTool@0
inputs
:
versionSpec
:
16.3.0
displayName
:
Configure Node.js version
-
task
:
Cache@2
inputs
:
key
:
'
python
|
"$(Agent.OS)"
|
dependencies/*.txt'
...
...
setup_ts.py
View file @
ef15fc81
...
...
@@ -22,7 +22,7 @@ import tarfile
from
zipfile
import
ZipFile
node_version
=
'v1
0.2
3.0'
node_version
=
'v1
6.
3.0'
yarn_version
=
'v1.22.10'
...
...
ts/nni_manager/.eslintrc
View file @
ef15fc81
...
...
@@ -24,14 +24,16 @@
"@typescript-eslint/no-inferrable-types": 0,
"no-inner-declarations": 0,
"@typescript-eslint/explicit-function-return-type": "error",
"@typescript-eslint/no-var-requires": 0,
"@typescript-eslint/no-non-null-assertion": 0,
"@typescript-eslint/no-unused-vars": [
"
error
",
"
off
",
{
"argsIgnorePattern": "^_"
}
],
"@typescript-eslint/no-var-requires": 0,
"@typescript-eslint/no-non-null-assertion": 0
"@typescript-eslint/no-use-before-define": 0
},
"ignorePatterns": [
"node_modules/",
...
...
ts/nni_manager/common/utils.ts
View file @
ef15fc81
...
...
@@ -56,7 +56,7 @@ function mkDirP(dirPath: string): Promise<void> {
}
else
{
const
parent
:
string
=
path
.
dirname
(
dirPath
);
mkDirP
(
parent
).
then
(()
=>
{
fs
.
mkdir
(
dirPath
,
(
err
:
Error
)
=>
{
fs
.
mkdir
(
dirPath
,
(
err
:
Error
|
null
)
=>
{
if
(
err
)
{
deferred
.
reject
(
err
);
}
else
{
...
...
ts/nni_manager/core/nniTensorboardManager.ts
View file @
ef15fc81
...
...
@@ -70,7 +70,7 @@ class NNITensorboardManager implements TensorboardManager {
this
.
log
.
error
(
error
);
const
alive
:
boolean
=
await
isAlive
(
tensorboardProc
.
pid
);
if
(
alive
)
{
process
.
kill
(
-
tensorboardProc
.
pid
);
process
.
kill
(
-
tensorboardProc
.
pid
!
);
}
this
.
setTensorboardTaskStatus
(
tensorboardTask
,
'
ERROR
'
);
});
...
...
ts/nni_manager/core/nnimanager.ts
View file @
ef15fc81
...
...
@@ -490,7 +490,7 @@ class NNIManager implements Manager {
};
const
newEnv
=
Object
.
assign
({},
process
.
env
,
nniEnv
);
const
tunerProc
:
ChildProcess
=
getTunerProc
(
command
,
stdio
,
newCwd
,
newEnv
);
this
.
dispatcherPid
=
tunerProc
.
pid
;
this
.
dispatcherPid
=
tunerProc
.
pid
!
;
this
.
dispatcher
=
createDispatcherInterface
(
tunerProc
);
return
;
...
...
ts/nni_manager/core/test/ipcInterface.test.ts
View file @
ef15fc81
...
...
@@ -30,7 +30,7 @@ function runProcess(): Promise<Error | null> {
if
(
code
!==
0
)
{
deferred
.
resolve
(
new
Error
(
`return code:
${
code
}
`
));
}
else
{
let
str
=
proc
.
stdout
.
read
().
toString
();
let
str
=
proc
.
stdout
!
.
read
().
toString
();
if
(
str
.
search
(
"
\r\n
"
)
!=-
1
){
sentCommands
=
str
.
split
(
"
\r\n
"
);
}
...
...
ts/nni_manager/package.json
View file @
ef15fc81
...
...
@@ -11,79 +11,71 @@
},
"license"
:
"MIT"
,
"dependencies"
:
{
"azure-storage"
:
"^2.10.
2
"
,
"azure-storage"
:
"^2.10.
4
"
,
"child-process-promise"
:
"^2.2.1"
,
"express"
:
"^4.1
6.3
"
,
"express-joi-validator"
:
"^2.0.
0
"
,
"ignore"
:
"^5.1.
4
"
,
"js-base64"
:
"^
2.4.9
"
,
"kubernetes-client"
:
"^6.
5.0
"
,
"express"
:
"^4.1
7.1
"
,
"express-joi-validator"
:
"^2.0.
1
"
,
"ignore"
:
"^5.1.
8
"
,
"js-base64"
:
"^
3.6.1
"
,
"kubernetes-client"
:
"^6.
12.1
"
,
"lockfile"
:
"^1.0.4"
,
"python-shell"
:
"^
2
.0.
1
"
,
"python-shell"
:
"^
3
.0.
0
"
,
"rx"
:
"^4.1.0"
,
"sqlite3"
:
"5.0.
0
"
,
"ssh2"
:
"^
0.8.9
"
,
"sqlite3"
:
"5.0.
2
"
,
"ssh2"
:
"^
1.1.0
"
,
"stream-buffers"
:
"^3.0.2"
,
"tail-stream"
:
"^0.3.4"
,
"tar"
:
"^6.
0.2
"
,
"tar"
:
"^6.
1.0
"
,
"tree-kill"
:
"^1.2.2"
,
"ts-deferred"
:
"^1.0.4"
,
"typescript-ioc"
:
"^1.2.4"
,
"typescript-string-operations"
:
"^1.3.1"
,
"webhdfs"
:
"^1.2.0"
,
"typescript-ioc"
:
"^1.2.6"
,
"typescript-string-operations"
:
"^1.4.1"
,
"ws"
:
"^7.4.6"
},
"devDependencies"
:
{
"@types/chai"
:
"^4.
1.4
"
,
"@types/chai"
:
"^4.
2.18
"
,
"@types/chai-as-promised"
:
"^7.1.0"
,
"@types/express"
:
"^4.1
6.0
"
,
"@types/glob"
:
"^7.1.
1
"
,
"@types/js-base64"
:
"^
2
.3.1"
,
"@types/js-yaml"
:
"^
3.12.5
"
,
"@types/express"
:
"^4.1
7.2
"
,
"@types/glob"
:
"^7.1.
3
"
,
"@types/js-base64"
:
"^
3
.3.1"
,
"@types/js-yaml"
:
"^
4.0.1
"
,
"@types/lockfile"
:
"^1.0.0"
,
"@types/mocha"
:
"^8.
0.3
"
,
"@types/node"
:
"
10
.12.1
8
"
,
"@types/request"
:
"^2.4
7.1
"
,
"@types/rx"
:
"^4.1.
1
"
,
"@types/sqlite3"
:
"^3.1.
3
"
,
"@types/ssh2"
:
"^0.5.
35
"
,
"@types/stream-buffers"
:
"^3.0.
2
"
,
"@types/tar"
:
"^4.0.
3
"
,
"@types/tmp"
:
"^0.
0.33
"
,
"@types/ws"
:
"^7.
2.5
"
,
"@types/mocha"
:
"^8.
2.2
"
,
"@types/node"
:
"
^15
.12.1"
,
"@types/request"
:
"^2.4
8.5
"
,
"@types/rx"
:
"^4.1.
2
"
,
"@types/sqlite3"
:
"^3.1.
7
"
,
"@types/ssh2"
:
"^0.5.
46
"
,
"@types/stream-buffers"
:
"^3.0.
3
"
,
"@types/tar"
:
"^4.0.
4
"
,
"@types/tmp"
:
"^0.
2.0
"
,
"@types/ws"
:
"^7.
4.4
"
,
"@typescript-eslint/eslint-plugin"
:
"^2.10.0"
,
"@typescript-eslint/parser"
:
"^
2.10
.0"
,
"chai"
:
"^4.
1.2
"
,
"@typescript-eslint/parser"
:
"^
4.26
.0"
,
"chai"
:
"^4.
3.4
"
,
"chai-as-promised"
:
"^7.1.1"
,
"eslint"
:
"^
6.
7.2"
,
"glob"
:
"^7.1.
3
"
,
"mocha"
:
"^8.
1.3
"
,
"eslint"
:
"^7.2
8.0
"
,
"glob"
:
"^7.1.
7
"
,
"mocha"
:
"^8.
4.0
"
,
"npx"
:
"^10.2.2"
,
"nyc"
:
"^15.
0
.0"
,
"request"
:
"^2.8
7.0
"
,
"nyc"
:
"^15.
1
.0"
,
"request"
:
"^2.8
8.2
"
,
"rmdir"
:
"^1.2.0"
,
"tmp"
:
"^0.
0.33
"
,
"ts-node"
:
"^
7
.0.0"
,
"typescript"
:
"^
3.2
.2"
"tmp"
:
"^0.
2.1
"
,
"ts-node"
:
"^
10
.0.0"
,
"typescript"
:
"^
4.3
.2"
},
"resolutions"
:
{
"mem"
:
"^4.0.0"
,
"lodash"
:
">=4.17.13"
,
"lodash.merge"
:
">=4.6.2"
,
"node.extend"
:
"^1.1.7"
,
"hoek"
:
"^4.2.1"
,
"js-yaml"
:
"^3.13.1"
,
"node-forge"
:
">=0.10.0"
,
"dot-prop"
:
"^4.2.1"
,
"npm"
:
">=6.14.8"
,
"yargs"
:
"~16.0.3"
,
"yargs-parser"
:
">=20.2.0"
,
"y18n"
:
">=5.0.5"
,
"acorn"
:
">=8.0.4"
,
"serialize-javascript"
:
">=5.0.1"
"acorn"
:
">=8.3.0"
,
"hoek"
:
">=6.1.3"
,
"node.extend"
:
">=1.1.8"
,
"npm"
:
">=7.16.0"
,
"y18n"
:
">=5.0.8"
,
"yargs-parser"
:
">=20.2.7"
,
"joi"
:
">=17.4.0"
},
"engines"
:
{
"node"
:
"
>=10.0
.0"
"node"
:
"
^16.3
.0"
},
"nyc"
:
{
"include"
:
[
...
...
ts/nni_manager/rest_server/restHandler.ts
View file @
ef15fc81
...
...
@@ -17,6 +17,9 @@ import { TensorboardManager, TensorboardTaskInfo } from '../common/tensorboardMa
import
{
ValidationSchemas
}
from
'
./restValidationSchemas
'
;
import
{
NNIRestServer
}
from
'
./nniRestServer
'
;
import
{
getVersion
}
from
'
../common/utils
'
;
import
{
MetricType
}
from
'
../common/datastore
'
;
import
{
ProfileUpdateType
}
from
'
../common/manager
'
;
import
{
LogType
,
TrialJobStatus
}
from
'
../common/trainingService
'
;
const
expressJoi
=
require
(
'
express-joi-validator
'
);
...
...
@@ -139,7 +142,7 @@ class NNIRestHandler {
private
updateExperimentProfile
(
router
:
Router
):
void
{
router
.
put
(
'
/experiment
'
,
(
req
:
Request
,
res
:
Response
)
=>
{
this
.
nniManager
.
updateExperimentProfile
(
req
.
body
,
req
.
query
.
update_type
).
then
(()
=>
{
this
.
nniManager
.
updateExperimentProfile
(
req
.
body
,
req
.
query
.
update_type
as
ProfileUpdateType
).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
this
.
handleError
(
err
,
res
);
...
...
@@ -219,7 +222,7 @@ class NNIRestHandler {
private
listTrialJobs
(
router
:
Router
):
void
{
router
.
get
(
'
/trial-jobs
'
,
(
req
:
Request
,
res
:
Response
)
=>
{
this
.
nniManager
.
listTrialJobs
(
req
.
query
.
status
).
then
((
jobInfos
:
TrialJobInfo
[])
=>
{
this
.
nniManager
.
listTrialJobs
(
req
.
query
.
status
as
TrialJobStatus
).
then
((
jobInfos
:
TrialJobInfo
[])
=>
{
jobInfos
.
forEach
((
trialJob
:
TrialJobInfo
)
=>
{
this
.
setErrorPathForFailedJob
(
trialJob
);
});
...
...
@@ -263,7 +266,7 @@ class NNIRestHandler {
private
getMetricData
(
router
:
Router
):
void
{
router
.
get
(
'
/metric-data/:job_id*?
'
,
async
(
req
:
Request
,
res
:
Response
)
=>
{
this
.
nniManager
.
getMetricData
(
req
.
params
.
job_id
,
req
.
query
.
type
).
then
((
metricsData
:
MetricDataRecord
[])
=>
{
this
.
nniManager
.
getMetricData
(
req
.
params
.
job_id
,
req
.
query
.
type
as
MetricType
).
then
((
metricsData
:
MetricDataRecord
[])
=>
{
res
.
send
(
metricsData
);
}).
catch
((
err
:
Error
)
=>
{
this
.
handleError
(
err
,
res
);
...
...
@@ -295,7 +298,7 @@ class NNIRestHandler {
private
getTrialLog
(
router
:
Router
):
void
{
router
.
get
(
'
/trial-log/:id/:type
'
,
async
(
req
:
Request
,
res
:
Response
)
=>
{
this
.
nniManager
.
getTrialLog
(
req
.
params
.
id
,
req
.
params
.
type
).
then
((
log
:
string
)
=>
{
this
.
nniManager
.
getTrialLog
(
req
.
params
.
id
,
req
.
params
.
type
as
LogType
).
then
((
log
:
string
)
=>
{
if
(
log
===
''
)
{
log
=
'
No logs available.
'
}
...
...
ts/nni_manager/rest_server/restValidationSchemas.ts
View file @
ef15fc81
...
...
@@ -82,7 +82,7 @@ export namespace ValidationSchemas {
gpuNum
:
joi
.
number
().
min
(
0
).
required
(),
command
:
joi
.
string
().
min
(
1
).
required
()
}),
taskRoles
:
joi
.
array
({
taskRoles
:
joi
.
array
(
).
items
(
{
name
:
joi
.
string
().
min
(
1
),
taskNum
:
joi
.
number
().
min
(
1
).
required
(),
image
:
joi
.
string
().
min
(
1
),
...
...
@@ -98,7 +98,7 @@ export namespace ValidationSchemas {
minSucceededTaskCount
:
joi
.
number
()
})
}),
imagePullSecrets
:
joi
.
array
({
imagePullSecrets
:
joi
.
array
(
).
items
(
{
name
:
joi
.
string
().
min
(
1
).
required
()
}),
// ############## adl ###############
...
...
ts/nni_manager/training_service/dlts/dltsClusterConfig.ts
deleted
100644 → 0
View file @
b2225436
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
export
interface
DLTSClusterConfig
{
dashboard
:
string
;
cluster
:
string
;
team
:
string
;
email
:
string
;
password
:
string
;
gpuType
?:
string
;
}
ts/nni_manager/training_service/dlts/dltsData.ts
deleted
100644 → 0
View file @
b2225436
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
export
const
DLTS_TRIAL_COMMAND_FORMAT
:
string
=
`export NNI_PLATFORM=dlts NNI_SYS_DIR={0} NNI_OUTPUT_DIR={1} NNI_TRIAL_JOB_ID={2} NNI_EXP_ID={3} NNI_TRIAL_SEQ_ID={4} MULTI_PHASE={5} \
&& cd $NNI_SYS_DIR && sh install_nni.sh \
&& cd '{6}' && python3 -m nni.tools.trial_tool.trial_keeper --trial_command '{7}' \
--nnimanager_ip '{8}' --nnimanager_port '{9}' --nni_manager_version '{10}' --log_collection '{11}'`
;
ts/nni_manager/training_service/dlts/dltsJobConfig.ts
deleted
100644 → 0
View file @
b2225436
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
import
{
DLTSClusterConfig
}
from
"
./dltsClusterConfig
"
;
export
class
DLTSJobConfig
{
public
readonly
team
:
string
;
public
readonly
userName
:
string
;
public
readonly
vcName
:
string
;
public
readonly
gpuType
:
string
;
public
readonly
jobType
=
"
training
"
;
public
readonly
jobtrainingtype
=
"
RegularJob
"
;
public
readonly
ssh
=
false
;
public
readonly
ipython
=
false
;
public
readonly
tensorboard
=
false
;
public
readonly
workPath
=
''
;
public
readonly
enableworkpath
=
true
;
public
readonly
dataPath
=
''
;
public
readonly
enabledatapath
=
false
;
public
readonly
jobPath
=
''
;
public
readonly
enablejobpath
=
true
;
public
readonly
mountpoints
=
[];
public
readonly
env
=
[{
name
:
'
TMPDIR
'
,
value
:
'
$HOME/tmp
'
}]
public
readonly
hostNetwork
=
false
;
public
readonly
useGPUTopology
=
false
;
public
readonly
isPrivileged
=
false
;
public
readonly
hostIPC
=
false
;
public
readonly
preemptionAllowed
=
"
False
"
public
constructor
(
clusterConfig
:
DLTSClusterConfig
,
public
readonly
jobName
:
string
,
public
readonly
resourcegpu
:
number
,
public
readonly
image
:
string
,
public
readonly
cmd
:
string
,
public
readonly
interactivePorts
:
number
[],
)
{
if
(
clusterConfig
.
gpuType
===
undefined
)
{
throw
Error
(
'
GPU type not fetched
'
)
}
this
.
vcName
=
this
.
team
=
clusterConfig
.
team
this
.
gpuType
=
clusterConfig
.
gpuType
this
.
userName
=
clusterConfig
.
email
}
}
ts/nni_manager/training_service/dlts/dltsJobRestServer.ts
deleted
100644 → 0
View file @
b2225436
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
{
Request
,
Response
,
Router
}
from
'
express
'
;
import
{
Inject
}
from
'
typescript-ioc
'
;
import
*
as
component
from
'
../../common/component
'
;
import
{
ClusterJobRestServer
}
from
'
../common/clusterJobRestServer
'
;
import
{
DLTSTrainingService
}
from
'
./dltsTrainingService
'
;
export
interface
ParameterFileMeta
{
readonly
experimentId
:
string
;
readonly
trialId
:
string
;
readonly
filePath
:
string
;
}
/**
* DLTS Training service Rest server, provides rest API to support DLTS job metrics update
*
*/
@
component
.
Singleton
export
class
DLTSJobRestServer
extends
ClusterJobRestServer
{
private
parameterFileMetaList
:
ParameterFileMeta
[]
=
[];
@
Inject
private
readonly
dltsTrainingService
:
DLTSTrainingService
;
/**
* constructor to provide NNIRestServer's own rest property, e.g. port
*/
constructor
()
{
super
();
this
.
dltsTrainingService
=
component
.
get
(
DLTSTrainingService
);
}
// tslint:disable-next-line:no-any
protected
handleTrialMetrics
(
jobId
:
string
,
metrics
:
any
[]):
void
{
// Split metrics array into single metric, then emit
// Warning: If not split metrics into single ones, the behavior will be UNKNOWN
for
(
const
singleMetric
of
metrics
)
{
this
.
dltsTrainingService
.
MetricsEmitter
.
emit
(
'
metric
'
,
{
id
:
jobId
,
data
:
singleMetric
});
}
}
protected
createRestHandler
():
Router
{
const
router
:
Router
=
super
.
createRestHandler
();
router
.
post
(
`/parameter-file-meta`
,
(
req
:
Request
,
res
:
Response
)
=>
{
try
{
this
.
log
.
info
(
`POST /parameter-file-meta, body is
${
JSON
.
stringify
(
req
.
body
)}
`
);
this
.
parameterFileMetaList
.
push
(
req
.
body
);
res
.
send
();
}
catch
(
err
)
{
this
.
log
.
error
(
`POST parameter-file-meta error:
${
err
}
`
);
res
.
status
(
500
);
res
.
send
(
err
.
message
);
}
});
router
.
get
(
`/parameter-file-meta`
,
(
req
:
Request
,
res
:
Response
)
=>
{
try
{
this
.
log
.
info
(
`GET /parameter-file-meta`
);
res
.
send
(
this
.
parameterFileMetaList
);
}
catch
(
err
)
{
this
.
log
.
error
(
`GET parameter-file-meta error:
${
err
}
`
);
res
.
status
(
500
);
res
.
send
(
err
.
message
);
}
});
return
router
;
}
}
ts/nni_manager/training_service/dlts/dltsTrainingService.ts
deleted
100644 → 0
View file @
b2225436
This diff is collapsed.
Click to expand it.
ts/nni_manager/training_service/dlts/dltsTrialConfig.ts
deleted
100644 → 0
View file @
b2225436
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
import
{
TrialConfig
}
from
"
training_service/common/trialConfig
"
;
export
class
DLTSTrialConfig
extends
TrialConfig
{
public
constructor
(
command
:
string
,
codeDir
:
string
,
gpuNum
:
number
,
public
readonly
image
:
string
)
{
super
(
command
,
codeDir
,
gpuNum
);
}
}
ts/nni_manager/training_service/dlts/dltsTrialJobDetail.ts
deleted
100644 → 0
View file @
b2225436
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
import
{
TrialJobDetail
,
TrialJobStatus
,
TrialJobApplicationForm
}
from
"
../../common/trainingService
"
;
export
class
DLTSTrialJobDetail
implements
TrialJobDetail
{
public
startTime
?:
number
;
public
endTime
?:
number
;
public
tags
?:
string
[];
public
url
?:
string
;
public
isEarlyStopped
?:
boolean
;
// DLTS staff
public
dltsJobId
?:
string
;
public
dltsPaused
:
boolean
=
false
;
public
constructor
(
public
id
:
string
,
public
status
:
TrialJobStatus
,
public
submitTime
:
number
,
public
workingDirectory
:
string
,
public
form
:
TrialJobApplicationForm
,
// DLTS staff
public
dltsJobName
:
string
,
)
{}
}
ts/nni_manager/training_service/remote_machine/shellExecutor.ts
View file @
ef15fc81
...
...
@@ -277,7 +277,7 @@ class ShellExecutor {
this
.
log
.
debug
(
`copyFileToRemote(
${
commandIndex
}
): localFilePath:
${
localFilePath
}
, remoteFilePath:
${
remoteFilePath
}
`
);
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
this
.
sshClient
.
sftp
((
err
:
Error
,
sftp
:
SFTPWrapper
)
=>
{
this
.
sshClient
.
sftp
((
err
:
Error
|
undefined
,
sftp
:
SFTPWrapper
)
=>
{
if
(
err
!==
undefined
&&
err
!==
null
)
{
this
.
log
.
error
(
`copyFileToRemote(
${
commandIndex
}
):
${
err
}
`
);
deferred
.
reject
(
err
);
...
...
@@ -328,7 +328,7 @@ class ShellExecutor {
const
commandIndex
=
randomInt
(
10000
);
this
.
log
.
debug
(
`getRemoteFileContent(
${
commandIndex
}
): filePath:
${
filePath
}
`
);
const
deferred
:
Deferred
<
string
>
=
new
Deferred
<
string
>
();
this
.
sshClient
.
sftp
((
err
:
Error
,
sftp
:
SFTPWrapper
)
=>
{
this
.
sshClient
.
sftp
((
err
:
Error
|
undefined
,
sftp
:
SFTPWrapper
)
=>
{
if
(
err
!==
undefined
&&
err
!==
null
)
{
this
.
log
.
error
(
`getRemoteFileContent(
${
commandIndex
}
) sftp:
${
err
}
`
);
deferred
.
reject
(
new
Error
(
`SFTP error:
${
err
}
`
));
...
...
@@ -376,7 +376,7 @@ class ShellExecutor {
// Windows always uses shell, and it needs to disable to get it works.
useShell
=
useShell
&&
!
this
.
isWindows
;
const
callback
=
(
err
:
Error
,
channel
:
ClientChannel
):
void
=>
{
const
callback
=
(
err
:
Error
|
undefined
,
channel
:
ClientChannel
):
void
=>
{
if
(
err
!==
undefined
&&
err
!==
null
)
{
this
.
log
.
error
(
`remoteExeCommand(
${
commandIndex
}
):
${
err
.
message
}
`
);
deferred
.
reject
(
err
);
...
...
ts/nni_manager/training_service/reusable/environments/openPaiEnvironmentService.ts
View file @
ef15fc81
...
...
@@ -310,7 +310,7 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
}
}
}
return
yaml
.
safeD
ump
(
nniJobConfig
);
return
yaml
.
d
ump
(
nniJobConfig
);
}
protected
formatPAIHost
(
host
:
string
):
string
{
...
...
ts/nni_manager/training_service/reusable/storageService.ts
View file @
ef15fc81
...
...
@@ -16,14 +16,14 @@ export abstract class StorageService {
protected
logger
:
Logger
;
protected
abstract
internalConfig
(
key
:
string
,
value
:
string
):
void
;
protected
abstract
async
internalRemove
(
remotePath
:
string
,
isDirectory
:
boolean
,
isRecursive
:
boolean
):
Promise
<
void
>
;
protected
abstract
async
internalRename
(
remotePath
:
string
,
newName
:
string
):
Promise
<
void
>
;
protected
abstract
async
internalMkdir
(
remotePath
:
string
):
Promise
<
void
>
;
protected
abstract
async
internalCopy
(
sourcePath
:
string
,
targetPath
:
string
,
isDirectory
:
boolean
,
isFromRemote
:
boolean
,
isToRemote
:
boolean
):
Promise
<
string
>
;
protected
abstract
async
internalExists
(
remotePath
:
string
):
Promise
<
boolean
>
;
protected
abstract
async
internalRead
(
remotePath
:
string
,
offset
:
number
,
length
:
number
):
Promise
<
string
>
;
protected
abstract
async
internalList
(
remotePath
:
string
):
Promise
<
string
[]
>
;
protected
abstract
async
internalAttach
(
remotePath
:
string
,
content
:
string
):
Promise
<
boolean
>
;
protected
abstract
internalRemove
(
remotePath
:
string
,
isDirectory
:
boolean
,
isRecursive
:
boolean
):
Promise
<
void
>
;
protected
abstract
internalRename
(
remotePath
:
string
,
newName
:
string
):
Promise
<
void
>
;
protected
abstract
internalMkdir
(
remotePath
:
string
):
Promise
<
void
>
;
protected
abstract
internalCopy
(
sourcePath
:
string
,
targetPath
:
string
,
isDirectory
:
boolean
,
isFromRemote
:
boolean
,
isToRemote
:
boolean
):
Promise
<
string
>
;
protected
abstract
internalExists
(
remotePath
:
string
):
Promise
<
boolean
>
;
protected
abstract
internalRead
(
remotePath
:
string
,
offset
:
number
,
length
:
number
):
Promise
<
string
>
;
protected
abstract
internalList
(
remotePath
:
string
):
Promise
<
string
[]
>
;
protected
abstract
internalAttach
(
remotePath
:
string
,
content
:
string
):
Promise
<
boolean
>
;
protected
abstract
internalIsRelativePath
(
path
:
string
):
boolean
;
protected
abstract
internalJoin
(...
paths
:
string
[]):
string
;
protected
abstract
internalDirname
(...
paths
:
string
[]):
string
;
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment