Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
0663218b
Unverified
Commit
0663218b
authored
Apr 22, 2019
by
SparkSnail
Committed by
GitHub
Apr 22, 2019
Browse files
Merge pull request #163 from Microsoft/master
merge master
parents
6c9360a5
cf983800
Changes
116
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
491 additions
and
99 deletions
+491
-99
examples/trials/cifar10_pytorch/utils.py
examples/trials/cifar10_pytorch/utils.py
+1
-1
install.ps1
install.ps1
+127
-0
setup.py
setup.py
+3
-2
src/nni_manager/common/datastore.ts
src/nni_manager/common/datastore.ts
+1
-1
src/nni_manager/common/manager.ts
src/nni_manager/common/manager.ts
+1
-0
src/nni_manager/common/utils.ts
src/nni_manager/common/utils.ts
+102
-6
src/nni_manager/core/commands.ts
src/nni_manager/core/commands.ts
+3
-0
src/nni_manager/core/nnimanager.ts
src/nni_manager/core/nnimanager.ts
+16
-16
src/nni_manager/core/test/dataStore.test.ts
src/nni_manager/core/test/dataStore.test.ts
+1
-0
src/nni_manager/core/test/ipcInterface.test.ts
src/nni_manager/core/test/ipcInterface.test.ts
+10
-5
src/nni_manager/core/test/ipcInterfaceTerminate.test.ts
src/nni_manager/core/test/ipcInterfaceTerminate.test.ts
+2
-4
src/nni_manager/core/test/nnimanager.test.ts
src/nni_manager/core/test/nnimanager.test.ts
+2
-1
src/nni_manager/package.json
src/nni_manager/package.json
+1
-2
src/nni_manager/rest_server/restHandler.ts
src/nni_manager/rest_server/restHandler.ts
+11
-0
src/nni_manager/rest_server/test/mockedNNIManager.ts
src/nni_manager/rest_server/test/mockedNNIManager.ts
+3
-0
src/nni_manager/training_service/common/gpuData.ts
src/nni_manager/training_service/common/gpuData.ts
+8
-1
src/nni_manager/training_service/common/util.ts
src/nni_manager/training_service/common/util.ts
+134
-1
src/nni_manager/training_service/local/gpuScheduler.ts
src/nni_manager/training_service/local/gpuScheduler.ts
+18
-21
src/nni_manager/training_service/local/localTrainingService.ts
...ni_manager/training_service/local/localTrainingService.ts
+45
-36
src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
...ng_service/remote_machine/remoteMachineTrainingService.ts
+2
-2
No files found.
examples/trials/cifar10_pytorch/utils.py
View file @
0663218b
...
@@ -43,7 +43,7 @@ def init_params(net):
...
@@ -43,7 +43,7 @@ def init_params(net):
term_width
=
0
term_width
=
0
try
:
try
:
_
,
term_width
=
os
.
popen
(
'stty size'
,
'r'
).
read
().
split
()
term_width
=
os
.
get_terminal_size
().
columns
except
Exception
as
exception
:
except
Exception
as
exception
:
term_width
=
200
term_width
=
200
term_width
=
int
(
term_width
)
term_width
=
int
(
term_width
)
...
...
install.ps1
0 → 100644
View file @
0663218b
[
Net.ServicePointManager
]::
SecurityProtocol
=
[
Net.SecurityProtocolType
]::
Tls12
$install_node
=
$true
$install_yarn
=
$true
# nodejs
$nodeUrl
=
"https://aka.ms/nni/nodejs-download/win64"
$yarnUrl
=
"https://yarnpkg.com/latest.tar.gz"
$unzipNodeDir
=
"node-v*"
$unzipYarnDir
=
"yarn-v*"
$NNI_DEPENDENCY_FOLDER
=
"C:\tmp\
$
env
:
USERNAME
"
$WHICH_PYTHON
=
where.exe
python
if
(
$WHICH_PYTHON
-eq
$null
){
throw
"Can not find python"
}
else
{
$pyVersion
=
&
python
-V
2
>
&
1
$pyVersion
=
([
string
]
$pyVersion
)
.
substring
(
7
,
3
)
if
([
double
]
$pyVersion
-lt
3.5
){
throw
"python version should >= 3.5"
}
}
$WHICH_PIP
=
where.exe
pip
if
(
$WHICH_PIP
-eq
$null
){
throw
"Can not find pip"
}
$
env
:
PYTHONIOENCODING
=
"UTF-8"
if
(
$
env
:
VIRTUAL_ENV
){
$NNI_PYTHON3
=
$
env
:
VIRTUAL_ENV
+
"\Scripts"
$NNI_PKG_FOLDER
=
$
env
:
VIRTUAL_ENV
+
"\nni"
$NNI_PYTHON_SCRIPTS
=
$NNI_PYTHON3
}
else
{
$NNI_PYTHON3
=
$
(
python
-c
'import site; from pathlib import Path; print(Path(site.getsitepackages()[0]))'
)
$NNI_PKG_FOLDER
=
$NNI_PYTHON3
+
"\nni"
$NNI_PYTHON_SCRIPTS
=
$NNI_PYTHON3
+
"\Scripts"
}
$PIP_INSTALL
=
"""
$NNI_PYTHON3
\python"" -m pip install ."
if
(
!
(
Test-Path
$NNI_DEPENDENCY_FOLDER
)){
New-Item
$NNI_DEPENDENCY_FOLDER
-ItemType
Directory
}
$NNI_NODE_ZIP
=
$NNI_DEPENDENCY_FOLDER
+
"\nni-node.zip"
$NNI_NODE_FOLDER
=
$NNI_DEPENDENCY_FOLDER
+
"\nni-node"
$NNI_YARN_TARBALL
=
$NNI_DEPENDENCY_FOLDER
+
"\nni-yarn.tar.gz"
$NNI_YARN_FOLDER
=
$NNI_DEPENDENCY_FOLDER
+
"\nni-yarn"
$NNI_YARN
=
$NNI_YARN_FOLDER
+
"\bin\yarn"
## Version number
$NNI_VERSION_VALUE
=
$
(
git
describe
--tags
)
$NNI_VERSION_TEMPLATE
=
"999.0.0-developing"
if
(
!
(
Test-Path
$NNI_NODE_ZIP
)){
Write-Host
"Downloading Node..."
(
New-Object
Net.WebClient
)
.
DownloadFile
(
$nodeUrl
,
$NNI_NODE_ZIP
)
}
if
(
!
(
Test-Path
$NNI_YARN_TARBALL
)){
Write-Host
"Downloading Yarn..."
(
New-Object
Net.WebClient
)
.
DownloadFile
(
$yarnUrl
,
$NNI_YARN_TARBALL
)
}
$NNI_YARN_TARBALL
=
$NNI_YARN_TARBALL
-split
'\\'
-join
'\\'
$NNI_DEPENDENCY_FOLDER
=
$NNI_DEPENDENCY_FOLDER
-split
'\\'
-join
'\\'
$SCRIPT_PATH
=
$NNI_DEPENDENCY_FOLDER
+
'\extract.py'
$SCRIPT
=
"import tarfile"
,
(
"tar = tarfile.open(""{0}"")"
-f
$NNI_YARN_TARBALL
),
(
"tar.extractall(""{0}"")"
-f
$NNI_DEPENDENCY_FOLDER
),
"tar.close()"
[
System.IO.File
]::
WriteAllLines
(
$SCRIPT_PATH
,
$SCRIPT
)
Add-Type
-AssemblyName
System.IO.Compression.FileSystem
function
Unzip
{
param
([
string
]
$zipfile
,
[
string
]
$outpath
)
[
System.IO.Compression.ZipFile
]::
ExtractToDirectory
(
$zipfile
,
$outpath
)
}
if
(
$install_node
)
{
### nodejs install
if
(
!
(
Test-Path
$NNI_NODE_FOLDER
)){
Unzip
$NNI_NODE_ZIP
$NNI_DEPENDENCY_FOLDER
$unzipNodeDir
=
Get-ChildItem
"
$NNI_DEPENDENCY_FOLDER
\
$unzipNodeDir
"
Rename-Item
$unzipNodeDir
"nni-node"
}
Copy-Item
"
$NNI_NODE_FOLDER
\node.exe"
$NNI_PYTHON_SCRIPTS
-Recurse
-Force
### yarn install
if
(
!
(
Test-Path
$NNI_YARN_FOLDER
)){
cmd
/C
"""
$NNI_PYTHON3
\python"""
$SCRIPT_PATH
$unzipYarnDir
=
Get-ChildItem
"
$NNI_DEPENDENCY_FOLDER
\
$unzipYarnDir
"
Rename-Item
$unzipYarnDir
"nni-yarn"
}
}
## install-python-modules:
### Installing Python SDK
(
Get-Content
setup.py
)
.
replace
(
$NNI_VERSION_TEMPLATE
,
$NNI_VERSION_VALUE
)
|
Set-Content
setup.py
cmd
/c
$PIP_INSTALL
# Building NNI Manager
$
env
:
PATH
=
$NNI_PYTHON_SCRIPTS
+
';'
+
$
env
:
PATH
cd
src\nni_manager
cmd
/c
$NNI_YARN
cmd
/c
$NNI_YARN
build
Copy-Item
config
-Destination
.
\dist\
-Recurse
-Force
# Building WebUI
cd
..
\webui
cmd
/c
$NNI_YARN
cmd
/c
$NNI_YARN
build
cd
..
\..
## install-node-modules
if
(
!
(
Test-Path
$NNI_PKG_FOLDER
)){
New-Item
$NNI_PKG_FOLDER
-ItemType
Directory
}
Remove-Item
$NNI_PKG_FOLDER
-Recurse
-Force
Copy-Item
"src\nni_manager\dist"
$NNI_PKG_FOLDER
-Recurse
Copy-Item
"src\nni_manager\package.json"
$NNI_PKG_FOLDER
$PKG_JSON
=
$NNI_PKG_FOLDER
+
"\package.json"
(
Get-Content
$PKG_JSON
)
.
replace
(
$NNI_VERSION_TEMPLATE
,
$NNI_VERSION_VALUE
)
|
Set-Content
$PKG_JSON
cmd
/c
$NNI_YARN
--prod
--cwd
$NNI_PKG_FOLDER
$NNI_PKG_FOLDER_STATIC
=
$NNI_PKG_FOLDER
+
"\static"
Copy-Item
"src\webui\build"
$NNI_PKG_FOLDER_STATIC
-Recurse
setup.py
View file @
0663218b
...
@@ -51,11 +51,12 @@ setup(
...
@@ -51,11 +51,12 @@ setup(
'json_tricks'
,
'json_tricks'
,
'numpy'
,
'numpy'
,
'psutil'
,
'psutil'
,
'
py
yaml'
,
'
ruamel.
yaml'
,
'requests'
,
'requests'
,
'scipy'
,
'scipy'
,
'schema'
,
'schema'
,
'PythonWebHDFS'
'PythonWebHDFS'
,
'colorama'
],
],
entry_points
=
{
entry_points
=
{
...
...
src/nni_manager/common/datastore.ts
View file @
0663218b
...
@@ -22,7 +22,7 @@
...
@@ -22,7 +22,7 @@
import
{
ExperimentProfile
,
TrialJobStatistics
}
from
'
./manager
'
;
import
{
ExperimentProfile
,
TrialJobStatistics
}
from
'
./manager
'
;
import
{
TrialJobDetail
,
TrialJobStatus
}
from
'
./trainingService
'
;
import
{
TrialJobDetail
,
TrialJobStatus
}
from
'
./trainingService
'
;
type
TrialJobEvent
=
TrialJobStatus
|
'
USER_TO_CANCEL
'
|
'
ADD_CUSTOMIZED
'
|
'
ADD_HYPERPARAMETER
'
;
type
TrialJobEvent
=
TrialJobStatus
|
'
USER_TO_CANCEL
'
|
'
ADD_CUSTOMIZED
'
|
'
ADD_HYPERPARAMETER
'
|
'
IMPORT_DATA
'
;
type
MetricType
=
'
PERIODICAL
'
|
'
FINAL
'
|
'
CUSTOM
'
|
'
REQUEST_PARAMETER
'
;
type
MetricType
=
'
PERIODICAL
'
|
'
FINAL
'
|
'
CUSTOM
'
|
'
REQUEST_PARAMETER
'
;
interface
ExperimentProfileRecord
{
interface
ExperimentProfileRecord
{
...
...
src/nni_manager/common/manager.ts
View file @
0663218b
...
@@ -99,6 +99,7 @@ abstract class Manager {
...
@@ -99,6 +99,7 @@ abstract class Manager {
public
abstract
stopExperiment
():
Promise
<
void
>
;
public
abstract
stopExperiment
():
Promise
<
void
>
;
public
abstract
getExperimentProfile
():
Promise
<
ExperimentProfile
>
;
public
abstract
getExperimentProfile
():
Promise
<
ExperimentProfile
>
;
public
abstract
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
;
public
abstract
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
;
public
abstract
importData
(
data
:
string
):
Promise
<
void
>
;
public
abstract
addCustomizedTrialJob
(
hyperParams
:
string
):
Promise
<
void
>
;
public
abstract
addCustomizedTrialJob
(
hyperParams
:
string
):
Promise
<
void
>
;
public
abstract
cancelTrialJobByUser
(
trialJobId
:
string
):
Promise
<
void
>
;
public
abstract
cancelTrialJobByUser
(
trialJobId
:
string
):
Promise
<
void
>
;
...
...
src/nni_manager/common/utils.ts
View file @
0663218b
...
@@ -22,6 +22,8 @@
...
@@ -22,6 +22,8 @@
import
*
as
assert
from
'
assert
'
;
import
*
as
assert
from
'
assert
'
;
import
{
randomBytes
}
from
'
crypto
'
;
import
{
randomBytes
}
from
'
crypto
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
*
as
cp
from
'
child_process
'
;
import
{
ChildProcess
,
spawn
,
StdioOptions
}
from
'
child_process
'
;
import
*
as
fs
from
'
fs
'
;
import
*
as
fs
from
'
fs
'
;
import
*
as
os
from
'
os
'
;
import
*
as
os
from
'
os
'
;
import
*
as
path
from
'
path
'
;
import
*
as
path
from
'
path
'
;
...
@@ -32,6 +34,7 @@ import * as util from 'util';
...
@@ -32,6 +34,7 @@ import * as util from 'util';
import
{
Database
,
DataStore
}
from
'
./datastore
'
;
import
{
Database
,
DataStore
}
from
'
./datastore
'
;
import
{
ExperimentStartupInfo
,
getExperimentId
,
getExperimentStartupInfo
,
setExperimentStartupInfo
}
from
'
./experimentStartupInfo
'
;
import
{
ExperimentStartupInfo
,
getExperimentId
,
getExperimentStartupInfo
,
setExperimentStartupInfo
}
from
'
./experimentStartupInfo
'
;
import
{
Manager
}
from
'
./manager
'
;
import
{
Manager
}
from
'
./manager
'
;
import
{
TrialConfig
}
from
'
../training_service/common/trialConfig
'
;
import
{
HyperParameters
,
TrainingService
,
TrialJobStatus
}
from
'
./trainingService
'
;
import
{
HyperParameters
,
TrainingService
,
TrialJobStatus
}
from
'
./trainingService
'
;
import
{
getLogger
}
from
'
./log
'
;
import
{
getLogger
}
from
'
./log
'
;
...
@@ -146,6 +149,23 @@ function parseArg(names: string[]): string {
...
@@ -146,6 +149,23 @@ function parseArg(names: string[]): string {
return
''
;
return
''
;
}
}
function
encodeCmdLineArgs
(
args
:
any
):
any
{
if
(
process
.
platform
===
'
win32
'
){
return
JSON
.
stringify
(
args
);
}
else
{
return
JSON
.
stringify
(
JSON
.
stringify
(
args
));
}
}
function
getCmdPy
():
string
{
let
cmd
=
'
python3
'
;
if
(
process
.
platform
===
'
win32
'
){
cmd
=
'
python
'
;
}
return
cmd
;
}
/**
/**
* Generate command line to start automl algorithm(s),
* Generate command line to start automl algorithm(s),
* either start advisor or start a process which runs tuner and assessor
* either start advisor or start a process which runs tuner and assessor
...
@@ -179,8 +199,7 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
...
@@ -179,8 +199,7 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
if
(
!
tuner
&&
!
advisor
)
{
if
(
!
tuner
&&
!
advisor
)
{
throw
new
Error
(
'
Error: specify neither tuner nor advisor is not allowed
'
);
throw
new
Error
(
'
Error: specify neither tuner nor advisor is not allowed
'
);
}
}
let
command
:
string
=
`
${
getCmdPy
()}
-m nni`
;
let
command
:
string
=
`python3 -m nni`
;
if
(
multiPhase
)
{
if
(
multiPhase
)
{
command
+=
'
--multi_phase
'
;
command
+=
'
--multi_phase
'
;
}
}
...
@@ -192,7 +211,7 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
...
@@ -192,7 +211,7 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
if
(
advisor
)
{
if
(
advisor
)
{
command
+=
` --advisor_class_name
${
advisor
.
className
}
`
;
command
+=
` --advisor_class_name
${
advisor
.
className
}
`
;
if
(
advisor
.
classArgs
!==
undefined
)
{
if
(
advisor
.
classArgs
!==
undefined
)
{
command
+=
` --advisor_args
${
JSON
.
stringify
(
JSON
.
stringify
(
advisor
.
classArgs
)
)
}
`
;
command
+=
` --advisor_args
${
encodeCmdLineArgs
(
advisor
.
classArgs
)}
`
;
}
}
if
(
advisor
.
codeDir
!==
undefined
&&
advisor
.
codeDir
.
length
>
1
)
{
if
(
advisor
.
codeDir
!==
undefined
&&
advisor
.
codeDir
.
length
>
1
)
{
command
+=
` --advisor_directory
${
advisor
.
codeDir
}
`
;
command
+=
` --advisor_directory
${
advisor
.
codeDir
}
`
;
...
@@ -203,7 +222,7 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
...
@@ -203,7 +222,7 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
}
else
{
}
else
{
command
+=
` --tuner_class_name
${
tuner
.
className
}
`
;
command
+=
` --tuner_class_name
${
tuner
.
className
}
`
;
if
(
tuner
.
classArgs
!==
undefined
)
{
if
(
tuner
.
classArgs
!==
undefined
)
{
command
+=
` --tuner_args
${
JSON
.
stringify
(
JSON
.
stringify
(
tuner
.
classArgs
)
)
}
`
;
command
+=
` --tuner_args
${
encodeCmdLineArgs
(
tuner
.
classArgs
)}
`
;
}
}
if
(
tuner
.
codeDir
!==
undefined
&&
tuner
.
codeDir
.
length
>
1
)
{
if
(
tuner
.
codeDir
!==
undefined
&&
tuner
.
codeDir
.
length
>
1
)
{
command
+=
` --tuner_directory
${
tuner
.
codeDir
}
`
;
command
+=
` --tuner_directory
${
tuner
.
codeDir
}
`
;
...
@@ -215,7 +234,7 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
...
@@ -215,7 +234,7 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
if
(
assessor
!==
undefined
&&
assessor
.
className
!==
undefined
)
{
if
(
assessor
!==
undefined
&&
assessor
.
className
!==
undefined
)
{
command
+=
` --assessor_class_name
${
assessor
.
className
}
`
;
command
+=
` --assessor_class_name
${
assessor
.
className
}
`
;
if
(
assessor
.
classArgs
!==
undefined
)
{
if
(
assessor
.
classArgs
!==
undefined
)
{
command
+=
` --assessor_args
${
JSON
.
stringify
(
JSON
.
stringify
(
assessor
.
classArgs
)
)
}
`
;
command
+=
` --assessor_args
${
encodeCmdLineArgs
(
assessor
.
classArgs
)}
`
;
}
}
if
(
assessor
.
codeDir
!==
undefined
&&
assessor
.
codeDir
.
length
>
1
)
{
if
(
assessor
.
codeDir
!==
undefined
&&
assessor
.
codeDir
.
length
>
1
)
{
command
+=
` --assessor_directory
${
assessor
.
codeDir
}
`
;
command
+=
` --assessor_directory
${
assessor
.
codeDir
}
`
;
...
@@ -363,6 +382,83 @@ async function getVersion(): Promise<string> {
...
@@ -363,6 +382,83 @@ async function getVersion(): Promise<string> {
return
deferred
.
promise
;
return
deferred
.
promise
;
}
}
/**
* run command as ChildProcess
*/
function
getTunerProc
(
command
:
string
,
stdio
:
StdioOptions
,
newCwd
:
string
,
newEnv
:
any
):
ChildProcess
{
let
cmd
:
string
=
command
;
let
arg
:
string
[]
=
[];
let
newShell
:
boolean
=
true
;
if
(
process
.
platform
===
"
win32
"
){
cmd
=
command
.
split
(
"
"
,
1
)[
0
];
arg
=
command
.
substr
(
cmd
.
length
+
1
).
split
(
"
"
);
newShell
=
false
;
}
const
tunerProc
:
ChildProcess
=
spawn
(
cmd
,
arg
,
{
stdio
,
cwd
:
newCwd
,
env
:
newEnv
,
shell
:
newShell
});
return
tunerProc
;
}
/**
* judge whether the process is alive
*/
async
function
isAlive
(
pid
:
any
):
Promise
<
boolean
>
{
let
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
let
alive
:
boolean
=
false
;
if
(
process
.
platform
===
'
win32
'
){
try
{
const
str
=
cp
.
execSync
(
`powershell.exe Get-Process -Id
${
pid
}
-ErrorAction SilentlyContinue`
).
toString
();
if
(
str
)
{
alive
=
true
;
}
}
catch
(
error
)
{
}
}
else
{
try
{
await
cpp
.
exec
(
`kill -0
${
pid
}
`
);
alive
=
true
;
}
catch
(
error
)
{
//ignore
}
}
deferred
.
resolve
(
alive
);
return
deferred
.
promise
;
}
/**
* kill process
*/
async
function
killPid
(
pid
:
any
):
Promise
<
void
>
{
let
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
try
{
if
(
process
.
platform
===
"
win32
"
)
{
await
cpp
.
exec
(
`cmd /c taskkill /PID
${
pid
}
/F`
);
}
else
{
await
cpp
.
exec
(
`kill -9
${
pid
}
`
);
}
}
catch
(
error
)
{
// pid does not exist, do nothing here
}
deferred
.
resolve
();
return
deferred
.
promise
;
}
function
getNewLine
():
string
{
if
(
process
.
platform
===
"
win32
"
)
{
return
"
\r\n
"
;
}
else
{
return
"
\n
"
;
}
}
export
{
countFilesRecursively
,
getRemoteTmpDir
,
generateParamFileName
,
getMsgDispatcherCommand
,
getCheckpointDir
,
export
{
countFilesRecursively
,
getRemoteTmpDir
,
generateParamFileName
,
getMsgDispatcherCommand
,
getCheckpointDir
,
getLogDir
,
getExperimentRootDir
,
getJobCancelStatus
,
getDefaultDatabaseDir
,
getIPV4Address
,
getLogDir
,
getExperimentRootDir
,
getJobCancelStatus
,
getDefaultDatabaseDir
,
getIPV4Address
,
mkDirP
,
delay
,
prepareUnitTest
,
parseArg
,
cleanupUnitTest
,
uniqueString
,
randomSelect
,
getLogLevel
,
getVersion
};
mkDirP
,
delay
,
prepareUnitTest
,
parseArg
,
cleanupUnitTest
,
uniqueString
,
randomSelect
,
getLogLevel
,
getVersion
,
getCmdPy
,
getTunerProc
,
isAlive
,
killPid
,
getNewLine
};
src/nni_manager/core/commands.ts
View file @
0663218b
...
@@ -22,6 +22,7 @@ const INITIALIZE = 'IN';
...
@@ -22,6 +22,7 @@ const INITIALIZE = 'IN';
const
REQUEST_TRIAL_JOBS
=
'
GE
'
;
const
REQUEST_TRIAL_JOBS
=
'
GE
'
;
const
REPORT_METRIC_DATA
=
'
ME
'
;
const
REPORT_METRIC_DATA
=
'
ME
'
;
const
UPDATE_SEARCH_SPACE
=
'
SS
'
;
const
UPDATE_SEARCH_SPACE
=
'
SS
'
;
const
IMPORT_DATA
=
'
FD
'
const
ADD_CUSTOMIZED_TRIAL_JOB
=
'
AD
'
;
const
ADD_CUSTOMIZED_TRIAL_JOB
=
'
AD
'
;
const
TRIAL_END
=
'
EN
'
;
const
TRIAL_END
=
'
EN
'
;
const
TERMINATE
=
'
TE
'
;
const
TERMINATE
=
'
TE
'
;
...
@@ -38,6 +39,7 @@ const TUNER_COMMANDS: Set<string> = new Set([
...
@@ -38,6 +39,7 @@ const TUNER_COMMANDS: Set<string> = new Set([
REQUEST_TRIAL_JOBS
,
REQUEST_TRIAL_JOBS
,
REPORT_METRIC_DATA
,
REPORT_METRIC_DATA
,
UPDATE_SEARCH_SPACE
,
UPDATE_SEARCH_SPACE
,
IMPORT_DATA
,
ADD_CUSTOMIZED_TRIAL_JOB
,
ADD_CUSTOMIZED_TRIAL_JOB
,
TERMINATE
,
TERMINATE
,
PING
,
PING
,
...
@@ -62,6 +64,7 @@ export {
...
@@ -62,6 +64,7 @@ export {
REQUEST_TRIAL_JOBS
,
REQUEST_TRIAL_JOBS
,
REPORT_METRIC_DATA
,
REPORT_METRIC_DATA
,
UPDATE_SEARCH_SPACE
,
UPDATE_SEARCH_SPACE
,
IMPORT_DATA
,
ADD_CUSTOMIZED_TRIAL_JOB
,
ADD_CUSTOMIZED_TRIAL_JOB
,
TRIAL_END
,
TRIAL_END
,
TERMINATE
,
TERMINATE
,
...
...
src/nni_manager/core/nnimanager.ts
View file @
0663218b
...
@@ -35,10 +35,10 @@ import {
...
@@ -35,10 +35,10 @@ import {
import
{
import
{
TrainingService
,
TrialJobApplicationForm
,
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
TrainingService
,
TrialJobApplicationForm
,
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
}
from
'
../common/trainingService
'
;
}
from
'
../common/trainingService
'
;
import
{
delay
,
getCheckpointDir
,
getExperimentRootDir
,
getLogDir
,
getMsgDispatcherCommand
,
mkDirP
,
get
LogLevel
}
from
'
../common/utils
'
;
import
{
delay
,
getCheckpointDir
,
getExperimentRootDir
,
getLogDir
,
getMsgDispatcherCommand
,
mkDirP
,
get
TunerProc
,
getLogLevel
,
isAlive
,
killPid
}
from
'
../common/utils
'
;
import
{
import
{
ADD_CUSTOMIZED_TRIAL_JOB
,
INITIALIZE
,
INITIALIZED
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
PING
,
ADD_CUSTOMIZED_TRIAL_JOB
,
INITIALIZE
,
INITIALIZED
,
KILL_TRIAL_JOB
,
NEW_TRIAL_JOB
,
NO_MORE_TRIAL_JOBS
,
PING
,
REPORT_METRIC_DATA
,
REQUEST_TRIAL_JOBS
,
SEND_TRIAL_JOB_PARAMETER
,
TERMINATE
,
TRIAL_END
,
UPDATE_SEARCH_SPACE
REPORT_METRIC_DATA
,
REQUEST_TRIAL_JOBS
,
SEND_TRIAL_JOB_PARAMETER
,
TERMINATE
,
TRIAL_END
,
UPDATE_SEARCH_SPACE
,
IMPORT_DATA
}
from
'
./commands
'
;
}
from
'
./commands
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
./ipcInterface
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
./ipcInterface
'
;
...
@@ -99,6 +99,17 @@ class NNIManager implements Manager {
...
@@ -99,6 +99,17 @@ class NNIManager implements Manager {
return
this
.
storeExperimentProfile
();
return
this
.
storeExperimentProfile
();
}
}
public
importData
(
data
:
string
):
Promise
<
void
>
{
if
(
this
.
dispatcher
===
undefined
)
{
return
Promise
.
reject
(
new
Error
(
'
tuner has not been setup
'
)
);
}
this
.
dispatcher
.
sendCommand
(
IMPORT_DATA
,
data
);
return
this
.
dataStore
.
storeTrialJobEvent
(
'
IMPORT_DATA
'
,
''
,
data
);
}
public
addCustomizedTrialJob
(
hyperParams
:
string
):
Promise
<
void
>
{
public
addCustomizedTrialJob
(
hyperParams
:
string
):
Promise
<
void
>
{
if
(
this
.
currSubmittedTrialNum
>=
this
.
experimentProfile
.
params
.
maxTrialNum
)
{
if
(
this
.
currSubmittedTrialNum
>=
this
.
experimentProfile
.
params
.
maxTrialNum
)
{
return
Promise
.
reject
(
return
Promise
.
reject
(
...
@@ -290,12 +301,7 @@ class NNIManager implements Manager {
...
@@ -290,12 +301,7 @@ class NNIManager implements Manager {
NNI_INCLUDE_INTERMEDIATE_RESULTS
:
includeIntermediateResultsEnv
NNI_INCLUDE_INTERMEDIATE_RESULTS
:
includeIntermediateResultsEnv
};
};
let
newEnv
=
Object
.
assign
({},
process
.
env
,
nniEnv
);
let
newEnv
=
Object
.
assign
({},
process
.
env
,
nniEnv
);
const
tunerProc
:
ChildProcess
=
spawn
(
command
,
[],
{
const
tunerProc
:
ChildProcess
=
getTunerProc
(
command
,
stdio
,
newCwd
,
newEnv
);
stdio
,
cwd
:
newCwd
,
env
:
newEnv
,
shell
:
true
});
this
.
dispatcherPid
=
tunerProc
.
pid
;
this
.
dispatcherPid
=
tunerProc
.
pid
;
this
.
dispatcher
=
createDispatcherInterface
(
tunerProc
);
this
.
dispatcher
=
createDispatcherInterface
(
tunerProc
);
...
@@ -341,16 +347,10 @@ class NNIManager implements Manager {
...
@@ -341,16 +347,10 @@ class NNIManager implements Manager {
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
// gracefully terminate tuner and assessor here, wait at most 30 seconds.
for
(
let
i
:
number
=
0
;
i
<
30
;
i
++
)
{
for
(
let
i
:
number
=
0
;
i
<
30
;
i
++
)
{
if
(
!
tunerAlive
)
{
break
;
}
if
(
!
tunerAlive
)
{
break
;
}
try
{
tunerAlive
=
await
isAlive
(
this
.
dispatcherPid
);
await
cpp
.
exec
(
`kill -0
${
this
.
dispatcherPid
}
`
);
}
catch
(
error
)
{
tunerAlive
=
false
;
}
await
delay
(
1000
);
await
delay
(
1000
);
}
}
try
{
await
killPid
(
this
.
dispatcherPid
);
await
cpp
.
exec
(
`kill -9
${
this
.
dispatcherPid
}
`
);
}
catch
(
error
)
{
// this.tunerPid does not exist, do nothing here
}
const
trialJobList
:
TrialJobDetail
[]
=
await
this
.
trainingService
.
listTrialJobs
();
const
trialJobList
:
TrialJobDetail
[]
=
await
this
.
trainingService
.
listTrialJobs
();
// TO DO: to promise all
// TO DO: to promise all
for
(
const
trialJob
of
trialJobList
)
{
for
(
const
trialJob
of
trialJobList
)
{
...
...
src/nni_manager/core/test/dataStore.test.ts
View file @
0663218b
...
@@ -42,6 +42,7 @@ describe('Unit test for dataStore', () => {
...
@@ -42,6 +42,7 @@ describe('Unit test for dataStore', () => {
});
});
after
(()
=>
{
after
(()
=>
{
ds
.
close
();
cleanupUnitTest
();
cleanupUnitTest
();
});
});
...
...
src/nni_manager/core/test/ipcInterface.test.ts
View file @
0663218b
...
@@ -18,11 +18,10 @@
...
@@ -18,11 +18,10 @@
*/
*/
'
use strict
'
;
'
use strict
'
;
import
*
as
assert
from
'
assert
'
;
import
*
as
assert
from
'
assert
'
;
import
{
ChildProcess
,
spawn
,
StdioOptions
}
from
'
child_process
'
;
import
{
ChildProcess
,
spawn
,
StdioOptions
}
from
'
child_process
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
}
from
'
../../common/utils
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
,
getTunerProc
,
getCmdPy
}
from
'
../../common/utils
'
;
import
*
as
CommandType
from
'
../commands
'
;
import
*
as
CommandType
from
'
../commands
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
../ipcInterface
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
../ipcInterface
'
;
import
{
NNIError
}
from
'
../../common/errors
'
;
import
{
NNIError
}
from
'
../../common/errors
'
;
...
@@ -39,15 +38,21 @@ function runProcess(): Promise<Error | null> {
...
@@ -39,15 +38,21 @@ function runProcess(): Promise<Error | null> {
// create fake assessor process
// create fake assessor process
const
stdio
:
StdioOptions
=
[
'
ignore
'
,
'
pipe
'
,
process
.
stderr
,
'
pipe
'
,
'
pipe
'
];
const
stdio
:
StdioOptions
=
[
'
ignore
'
,
'
pipe
'
,
process
.
stderr
,
'
pipe
'
,
'
pipe
'
];
const
proc
:
ChildProcess
=
spawn
(
'
python3 assessor.py
'
,
[],
{
stdio
,
cwd
:
'
core/test
'
,
shell
:
true
})
;
const
command
:
string
=
getCmdPy
()
+
'
assessor.py
'
;
const
proc
:
ChildProcess
=
getTunerProc
(
command
,
stdio
,
'
core/test
'
,
process
.
env
);
// record its sent/received commands on exit
// record its sent/received commands on exit
proc
.
on
(
'
error
'
,
(
error
:
Error
):
void
=>
{
deferred
.
resolve
(
error
);
});
proc
.
on
(
'
error
'
,
(
error
:
Error
):
void
=>
{
deferred
.
resolve
(
error
);
});
proc
.
on
(
'
exit
'
,
(
code
:
number
):
void
=>
{
proc
.
on
(
'
exit
'
,
(
code
:
number
):
void
=>
{
if
(
code
!==
0
)
{
if
(
code
!==
0
)
{
deferred
.
resolve
(
new
Error
(
`return code:
${
code
}
`
));
deferred
.
resolve
(
new
Error
(
`return code:
${
code
}
`
));
}
else
{
}
else
{
sentCommands
=
proc
.
stdout
.
read
().
toString
().
split
(
'
\n
'
);
let
str
=
proc
.
stdout
.
read
().
toString
();
if
(
str
.
search
(
"
\r\n
"
)
!=-
1
){
sentCommands
=
str
.
split
(
"
\r\n
"
);
}
else
{
sentCommands
=
str
.
split
(
'
\n
'
);
}
deferred
.
resolve
(
null
);
deferred
.
resolve
(
null
);
}
}
});
});
...
...
src/nni_manager/core/test/ipcInterfaceTerminate.test.ts
View file @
0663218b
...
@@ -22,7 +22,7 @@
...
@@ -22,7 +22,7 @@
import
*
as
assert
from
'
assert
'
;
import
*
as
assert
from
'
assert
'
;
import
{
ChildProcess
,
spawn
,
StdioOptions
}
from
'
child_process
'
;
import
{
ChildProcess
,
spawn
,
StdioOptions
}
from
'
child_process
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
,
getMsgDispatcherCommand
}
from
'
../../common/utils
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
,
getMsgDispatcherCommand
,
getTunerProc
}
from
'
../../common/utils
'
;
import
*
as
CommandType
from
'
../commands
'
;
import
*
as
CommandType
from
'
../commands
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
../ipcInterface
'
;
import
{
createDispatcherInterface
,
IpcInterface
}
from
'
../ipcInterface
'
;
...
@@ -50,9 +50,7 @@ function startProcess(): void {
...
@@ -50,9 +50,7 @@ function startProcess(): void {
// advisor
// advisor
undefined
undefined
);
);
const
proc
:
ChildProcess
=
getTunerProc
(
dispatcherCmd
,
stdio
,
'
core/test
'
,
process
.
env
);
const
proc
:
ChildProcess
=
spawn
(
dispatcherCmd
,
[],
{
stdio
,
cwd
:
'
core/test
'
,
shell
:
true
});
proc
.
on
(
'
error
'
,
(
error
:
Error
):
void
=>
{
proc
.
on
(
'
error
'
,
(
error
:
Error
):
void
=>
{
procExit
=
true
;
procExit
=
true
;
procError
=
true
;
procError
=
true
;
...
...
src/nni_manager/core/test/nnimanager.test.ts
View file @
0663218b
...
@@ -33,6 +33,7 @@ import { NNIManager } from '../nnimanager';
...
@@ -33,6 +33,7 @@ import { NNIManager } from '../nnimanager';
import
{
SqlDB
}
from
'
../sqlDatabase
'
;
import
{
SqlDB
}
from
'
../sqlDatabase
'
;
import
{
MockedTrainingService
}
from
'
./mockedTrainingService
'
;
import
{
MockedTrainingService
}
from
'
./mockedTrainingService
'
;
import
{
MockedDataStore
}
from
'
./mockedDatastore
'
;
import
{
MockedDataStore
}
from
'
./mockedDatastore
'
;
import
*
as
path
from
'
path
'
;
async
function
initContainer
():
Promise
<
void
>
{
async
function
initContainer
():
Promise
<
void
>
{
prepareUnitTest
();
prepareUnitTest
();
...
@@ -183,7 +184,7 @@ describe('Unit test for nnimanager', function () {
...
@@ -183,7 +184,7 @@ describe('Unit test for nnimanager', function () {
it
(
'
test getExperimentProfile
'
,
()
=>
{
it
(
'
test getExperimentProfile
'
,
()
=>
{
return
nniManager
.
getExperimentProfile
().
then
((
experimentProfile
)
=>
{
return
nniManager
.
getExperimentProfile
().
then
((
experimentProfile
)
=>
{
expect
(
experimentProfile
.
id
).
to
.
be
.
equal
(
'
unittest
'
);
expect
(
experimentProfile
.
id
).
to
.
be
.
equal
(
'
unittest
'
);
expect
(
experimentProfile
.
logDir
).
to
.
be
.
equal
(
os
.
homedir
()
+
'
/nni/
experiments
/
unittest
'
);
expect
(
experimentProfile
.
logDir
).
to
.
be
.
equal
(
path
.
join
(
os
.
homedir
()
,
'
nni
'
,
'
experiments
'
,
'
unittest
'
)
)
;
}).
catch
((
error
)
=>
{
}).
catch
((
error
)
=>
{
assert
.
fail
(
error
);
assert
.
fail
(
error
);
...
...
src/nni_manager/package.json
View file @
0663218b
...
@@ -3,7 +3,6 @@
...
@@ -3,7 +3,6 @@
"version"
:
"999.0.0-developing"
,
"version"
:
"999.0.0-developing"
,
"main"
:
"index.js"
,
"main"
:
"index.js"
,
"scripts"
:
{
"scripts"
:
{
"postbuild"
:
"cp -rf config ./dist/"
,
"build"
:
"tsc"
,
"build"
:
"tsc"
,
"test"
:
"nyc mocha -r ts-node/register -t 15000 --recursive **/*.test.ts --exclude node_modules/**/**/*.test.ts --colors"
,
"test"
:
"nyc mocha -r ts-node/register -t 15000 --recursive **/*.test.ts --exclude node_modules/**/**/*.test.ts --colors"
,
"start"
:
"node dist/main.js"
,
"start"
:
"node dist/main.js"
,
...
@@ -35,7 +34,7 @@
...
@@ -35,7 +34,7 @@
"@types/express"
:
"^4.16.0"
,
"@types/express"
:
"^4.16.0"
,
"@types/glob"
:
"^7.1.1"
,
"@types/glob"
:
"^7.1.1"
,
"@types/mocha"
:
"^5.2.5"
,
"@types/mocha"
:
"^5.2.5"
,
"@types/node"
:
"
^
10.12.18"
,
"@types/node"
:
"10.12.18"
,
"@types/request"
:
"^2.47.1"
,
"@types/request"
:
"^2.47.1"
,
"@types/rx"
:
"^4.1.1"
,
"@types/rx"
:
"^4.1.1"
,
"@types/sqlite3"
:
"^3.1.3"
,
"@types/sqlite3"
:
"^3.1.3"
,
...
...
src/nni_manager/rest_server/restHandler.ts
View file @
0663218b
...
@@ -63,6 +63,7 @@ class NNIRestHandler {
...
@@ -63,6 +63,7 @@ class NNIRestHandler {
this
.
checkStatus
(
router
);
this
.
checkStatus
(
router
);
this
.
getExperimentProfile
(
router
);
this
.
getExperimentProfile
(
router
);
this
.
updateExperimentProfile
(
router
);
this
.
updateExperimentProfile
(
router
);
this
.
importData
(
router
);
this
.
startExperiment
(
router
);
this
.
startExperiment
(
router
);
this
.
getTrialJobStatistics
(
router
);
this
.
getTrialJobStatistics
(
router
);
this
.
setClusterMetaData
(
router
);
this
.
setClusterMetaData
(
router
);
...
@@ -145,6 +146,16 @@ class NNIRestHandler {
...
@@ -145,6 +146,16 @@ class NNIRestHandler {
});
});
}
}
private
importData
(
router
:
Router
):
void
{
router
.
post
(
'
/experiment/import-data
'
,
(
req
:
Request
,
res
:
Response
)
=>
{
this
.
nniManager
.
importData
(
JSON
.
stringify
(
req
.
body
)).
then
(()
=>
{
res
.
send
();
}).
catch
((
err
:
Error
)
=>
{
this
.
handle_error
(
err
,
res
);
});
});
}
private
startExperiment
(
router
:
Router
):
void
{
private
startExperiment
(
router
:
Router
):
void
{
router
.
post
(
'
/experiment
'
,
expressJoi
(
ValidationSchemas
.
STARTEXPERIMENT
),
(
req
:
Request
,
res
:
Response
)
=>
{
router
.
post
(
'
/experiment
'
,
expressJoi
(
ValidationSchemas
.
STARTEXPERIMENT
),
(
req
:
Request
,
res
:
Response
)
=>
{
if
(
isNewExperiment
())
{
if
(
isNewExperiment
())
{
...
...
src/nni_manager/rest_server/test/mockedNNIManager.ts
View file @
0663218b
...
@@ -46,6 +46,9 @@ export class MockedNNIManager extends Manager {
...
@@ -46,6 +46,9 @@ export class MockedNNIManager extends Manager {
public
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
{
public
updateExperimentProfile
(
experimentProfile
:
ExperimentProfile
,
updateType
:
ProfileUpdateType
):
Promise
<
void
>
{
return
Promise
.
resolve
();
return
Promise
.
resolve
();
}
}
public
importData
(
data
:
string
):
Promise
<
void
>
{
return
Promise
.
resolve
();
}
public
getTrialJobStatistics
():
Promise
<
TrialJobStatistics
[]
>
{
public
getTrialJobStatistics
():
Promise
<
TrialJobStatistics
[]
>
{
const
deferred
:
Deferred
<
TrialJobStatistics
[]
>
=
new
Deferred
<
TrialJobStatistics
[]
>
();
const
deferred
:
Deferred
<
TrialJobStatistics
[]
>
=
new
Deferred
<
TrialJobStatistics
[]
>
();
deferred
.
resolve
([{
deferred
.
resolve
([{
...
...
src/nni_manager/training_service/common/gpuData.ts
View file @
0663218b
...
@@ -59,10 +59,17 @@ export class GPUSummary {
...
@@ -59,10 +59,17 @@ export class GPUSummary {
}
}
}
}
export
const
GPU_INFO_COLLECTOR_FORMAT
:
string
=
export
const
GPU_INFO_COLLECTOR_FORMAT
_LINUX
:
string
=
`
`
#!/bin/bash
#!/bin/bash
export METRIC_OUTPUT_DIR={0}
export METRIC_OUTPUT_DIR={0}
echo $$ >{1}
echo $$ >{1}
python3 -m nni_gpu_tool.gpu_metrics_collector
python3 -m nni_gpu_tool.gpu_metrics_collector
`
`
export
const
GPU_INFO_COLLECTOR_FORMAT_WINDOWS
:
string
=
`
$env:METRIC_OUTPUT_DIR="{0}"
$app = Start-Process "python" -ArgumentList "-m nni_gpu_tool.gpu_metrics_collector" -passthru -NoNewWindow
Write $app.ID | Out-File {1} -NoNewline -encoding utf8
`
\ No newline at end of file
src/nni_manager/training_service/common/util.ts
View file @
0663218b
...
@@ -22,6 +22,12 @@ import { getLogger } from "common/log";
...
@@ -22,6 +22,12 @@ import { getLogger } from "common/log";
'
use strict
'
;
'
use strict
'
;
import
{
countFilesRecursively
}
from
'
../../common/utils
'
import
{
countFilesRecursively
}
from
'
../../common/utils
'
import
*
as
cpp
from
'
child-process-promise
'
;
import
*
as
cp
from
'
child_process
'
;
import
{
GPU_INFO_COLLECTOR_FORMAT_LINUX
,
GPU_INFO_COLLECTOR_FORMAT_WINDOWS
}
from
'
./gpuData
'
import
*
as
path
from
'
path
'
;
import
{
String
}
from
'
typescript-string-operations
'
;
import
{
file
}
from
"
../../node_modules/@types/tmp
"
;
/**
/**
* Validate codeDir, calculate file count recursively under codeDir, and throw error if any rule is broken
* Validate codeDir, calculate file count recursively under codeDir, and throw error if any rule is broken
...
@@ -46,3 +52,130 @@ export async function validateCodeDir(codeDir: string) : Promise<number> {
...
@@ -46,3 +52,130 @@ export async function validateCodeDir(codeDir: string) : Promise<number> {
return
fileCount
;
return
fileCount
;
}
}
/**
* crete a new directory
* @param directory
*/
export
async
function
execMkdir
(
directory
:
string
):
Promise
<
void
>
{
if
(
process
.
platform
===
'
win32
'
)
{
await
cpp
.
exec
(
`powershell.exe New-Item -Path
${
directory
}
-ItemType "directory" -Force`
);
}
else
{
await
cpp
.
exec
(
`mkdir -p
${
directory
}
`
);
}
return
Promise
.
resolve
();
}
/**
* crete a new file
* @param filename
*/
export
async
function
execNewFile
(
filename
:
string
):
Promise
<
void
>
{
if
(
process
.
platform
===
'
win32
'
)
{
await
cpp
.
exec
(
`powershell.exe New-Item -Path
${
filename
}
-ItemType "file" -Force`
);
}
else
{
await
cpp
.
exec
(
`touch
${
filename
}
`
);
}
return
Promise
.
resolve
();
}
/**
* run script
* @param filePath
*/
export
function
execScript
(
filePath
:
string
):
cp
.
ChildProcess
{
if
(
process
.
platform
===
'
win32
'
)
{
return
cp
.
exec
(
`powershell.exe -file
${
filePath
}
`
);
}
else
{
return
cp
.
exec
(
`bash
${
filePath
}
`
);
}
}
/**
* output the last line of a file
* @param filePath
*/
export
async
function
execTail
(
filePath
:
string
):
Promise
<
cpp
.
childProcessPromise
.
Result
>
{
let
cmdresult
:
cpp
.
childProcessPromise
.
Result
;
if
(
process
.
platform
===
'
win32
'
)
{
cmdresult
=
await
cpp
.
exec
(
`powershell.exe Get-Content
${
filePath
}
-Tail 1`
);
}
else
{
cmdresult
=
await
cpp
.
exec
(
`tail -n 1
${
filePath
}
`
);
}
return
Promise
.
resolve
(
cmdresult
);
}
/**
* delete a directory
* @param directory
*/
export
async
function
execRemove
(
directory
:
string
):
Promise
<
void
>
{
if
(
process
.
platform
===
'
win32
'
)
{
await
cpp
.
exec
(
`powershell.exe Remove-Item
${
directory
}
`
);
}
else
{
await
cpp
.
exec
(
`rm -rf
${
directory
}
`
);
}
return
Promise
.
resolve
();
}
/**
* kill a process
* @param directory
*/
export
async
function
execKill
(
pid
:
string
):
Promise
<
void
>
{
if
(
process
.
platform
===
'
win32
'
)
{
await
cpp
.
exec
(
`cmd /c taskkill /PID
${
pid
}
/T /F`
);
}
else
{
await
cpp
.
exec
(
`pkill -P
${
pid
}
`
);
}
return
Promise
.
resolve
();
}
/**
* set environment variable
* @param variable
* @returns command string
*/
export
function
setEnvironmentVariable
(
variable
:
{
key
:
string
;
value
:
string
}):
string
{
if
(
process
.
platform
===
'
win32
'
)
{
return
`$env:
${
variable
.
key
}
="
${
variable
.
value
}
"`
;
}
else
{
return
`export
${
variable
.
key
}
=
${
variable
.
value
}
`
;
}
}
/**
* generate script file name
* @param fileNamePrefix
*/
export
function
getScriptName
(
fileNamePrefix
:
string
):
string
{
if
(
process
.
platform
===
'
win32
'
)
{
return
fileNamePrefix
+
'
.ps1
'
;
}
else
{
return
fileNamePrefix
+
'
.sh
'
;
}
}
/**
* generate script file
* @param gpuMetricCollectorScriptFolder
*/
export
function
getgpuMetricsCollectorScriptContent
(
gpuMetricCollectorScriptFolder
:
string
):
string
{
if
(
process
.
platform
===
'
win32
'
)
{
return
String
.
Format
(
GPU_INFO_COLLECTOR_FORMAT_WINDOWS
,
gpuMetricCollectorScriptFolder
,
path
.
join
(
gpuMetricCollectorScriptFolder
,
'
pid
'
),
);
}
else
{
return
String
.
Format
(
GPU_INFO_COLLECTOR_FORMAT_LINUX
,
gpuMetricCollectorScriptFolder
,
path
.
join
(
gpuMetricCollectorScriptFolder
,
'
pid
'
),
);
}
}
src/nni_manager/training_service/local/gpuScheduler.ts
View file @
0663218b
...
@@ -25,9 +25,10 @@ import * as fs from 'fs';
...
@@ -25,9 +25,10 @@ import * as fs from 'fs';
import
*
as
os
from
'
os
'
;
import
*
as
os
from
'
os
'
;
import
*
as
path
from
'
path
'
;
import
*
as
path
from
'
path
'
;
import
{
String
}
from
'
typescript-string-operations
'
;
import
{
String
}
from
'
typescript-string-operations
'
;
import
{
execMkdir
,
getScriptName
,
getgpuMetricsCollectorScriptContent
,
execScript
,
execTail
,
execRemove
,
execKill
}
from
'
../common/util
'
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
delay
}
from
'
../../common/utils
'
;
import
{
delay
}
from
'
../../common/utils
'
;
import
{
GPU_INFO_COLLECTOR_FORMAT
,
GPUInfo
,
GPUSummary
}
from
'
../common/gpuData
'
;
import
{
GPUInfo
,
GPUSummary
}
from
'
../common/gpuData
'
;
/**
/**
* GPUScheduler for local training service
* GPUScheduler for local training service
...
@@ -57,6 +58,19 @@ class GPUScheduler {
...
@@ -57,6 +58,19 @@ class GPUScheduler {
}
}
}
}
/**
* Generate gpu metric collector shell script in local machine,
* used to run in remote machine, and will be deleted after uploaded from local.
*/
private
async
runGpuMetricsCollectorScript
():
Promise
<
void
>
{
await
execMkdir
(
this
.
gpuMetricCollectorScriptFolder
);
//generate gpu_metrics_collector script
let
gpuMetricsCollectorScriptPath
:
string
=
path
.
join
(
this
.
gpuMetricCollectorScriptFolder
,
getScriptName
(
'
gpu_metrics_collector
'
));
const
gpuMetricsCollectorScriptContent
:
string
=
getgpuMetricsCollectorScriptContent
(
this
.
gpuMetricCollectorScriptFolder
);
await
fs
.
promises
.
writeFile
(
gpuMetricsCollectorScriptPath
,
gpuMetricsCollectorScriptContent
,
{
encoding
:
'
utf8
'
});
execScript
(
gpuMetricsCollectorScriptPath
)
}
public
getAvailableGPUIndices
():
number
[]
{
public
getAvailableGPUIndices
():
number
[]
{
if
(
this
.
gpuSummary
!==
undefined
)
{
if
(
this
.
gpuSummary
!==
undefined
)
{
return
this
.
gpuSummary
.
gpuInfos
.
filter
((
info
:
GPUInfo
)
=>
info
.
activeProcessNum
===
0
)
return
this
.
gpuSummary
.
gpuInfos
.
filter
((
info
:
GPUInfo
)
=>
info
.
activeProcessNum
===
0
)
...
@@ -78,33 +92,16 @@ class GPUScheduler {
...
@@ -78,33 +92,16 @@ class GPUScheduler {
this
.
stopping
=
true
;
this
.
stopping
=
true
;
try
{
try
{
const
pid
:
string
=
await
fs
.
promises
.
readFile
(
path
.
join
(
this
.
gpuMetricCollectorScriptFolder
,
'
pid
'
),
'
utf8
'
);
const
pid
:
string
=
await
fs
.
promises
.
readFile
(
path
.
join
(
this
.
gpuMetricCollectorScriptFolder
,
'
pid
'
),
'
utf8
'
);
await
cpp
.
exec
(
`pkill -P
${
pid
}
`
);
await
execKill
(
pid
);
await
cpp
.
exec
(
`rm -rf
${
this
.
gpuMetricCollectorScriptFolder
}
`
);
await
execRemove
(
this
.
gpuMetricCollectorScriptFolder
);
}
catch
(
error
)
{
}
catch
(
error
)
{
this
.
log
.
error
(
`GPU scheduler error:
${
error
}
`
);
this
.
log
.
error
(
`GPU scheduler error:
${
error
}
`
);
}
}
}
}
/**
* Generate gpu metric collector shell script in local machine,
* used to run in remote machine, and will be deleted after uploaded from local.
*/
private
async
runGpuMetricsCollectorScript
():
Promise
<
void
>
{
await
cpp
.
exec
(
`mkdir -p
${
this
.
gpuMetricCollectorScriptFolder
}
`
);
//generate gpu_metrics_collector.sh
const
gpuMetricsCollectorScriptPath
:
string
=
path
.
join
(
this
.
gpuMetricCollectorScriptFolder
,
'
gpu_metrics_collector.sh
'
);
const
gpuMetricsCollectorScriptContent
:
string
=
String
.
Format
(
GPU_INFO_COLLECTOR_FORMAT
,
this
.
gpuMetricCollectorScriptFolder
,
path
.
join
(
this
.
gpuMetricCollectorScriptFolder
,
'
pid
'
)
);
await
fs
.
promises
.
writeFile
(
gpuMetricsCollectorScriptPath
,
gpuMetricsCollectorScriptContent
,
{
encoding
:
'
utf8
'
});
cp
.
exec
(
`bash
${
gpuMetricsCollectorScriptPath
}
`
);
}
private
async
updateGPUSummary
():
Promise
<
void
>
{
private
async
updateGPUSummary
():
Promise
<
void
>
{
const
cmdresult
:
cpp
.
childProcessPromise
.
Result
=
const
cmdresult
:
cpp
.
childProcessPromise
.
Result
=
await
cpp
.
exec
(
`tail -n 1
${
path
.
join
(
this
.
gpuMetricCollectorScriptFolder
,
'
gpu_metrics
'
)
}
`
);
await
execTail
(
path
.
join
(
this
.
gpuMetricCollectorScriptFolder
,
'
gpu_metrics
'
));
if
(
cmdresult
&&
cmdresult
.
stdout
)
{
if
(
cmdresult
&&
cmdresult
.
stdout
)
{
this
.
gpuSummary
=
<
GPUSummary
>
JSON
.
parse
(
cmdresult
.
stdout
);
this
.
gpuSummary
=
<
GPUSummary
>
JSON
.
parse
(
cmdresult
.
stdout
);
}
else
{
}
else
{
...
...
src/nni_manager/training_service/local/localTrainingService.ts
View file @
0663218b
...
@@ -18,7 +18,6 @@
...
@@ -18,7 +18,6 @@
*/
*/
'
use strict
'
;
'
use strict
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
*
as
cp
from
'
child_process
'
;
import
*
as
cp
from
'
child_process
'
;
import
{
EventEmitter
}
from
'
events
'
;
import
{
EventEmitter
}
from
'
events
'
;
...
@@ -32,7 +31,8 @@ import {
...
@@ -32,7 +31,8 @@ import {
HostJobApplicationForm
,
HyperParameters
,
JobApplicationForm
,
TrainingService
,
TrialJobApplicationForm
,
HostJobApplicationForm
,
HyperParameters
,
JobApplicationForm
,
TrainingService
,
TrialJobApplicationForm
,
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
TrialJobDetail
,
TrialJobMetric
,
TrialJobStatus
}
from
'
../../common/trainingService
'
;
}
from
'
../../common/trainingService
'
;
import
{
delay
,
generateParamFileName
,
getExperimentRootDir
,
getJobCancelStatus
,
uniqueString
}
from
'
../../common/utils
'
;
import
{
delay
,
generateParamFileName
,
getExperimentRootDir
,
getJobCancelStatus
,
uniqueString
,
isAlive
,
getNewLine
}
from
'
../../common/utils
'
;
import
{
execMkdir
,
getScriptName
,
execScript
,
setEnvironmentVariable
,
execNewFile
}
from
'
../common/util
'
import
{
TrialConfig
}
from
'
../common/trialConfig
'
;
import
{
TrialConfig
}
from
'
../common/trialConfig
'
;
import
{
TrialConfigMetadataKey
}
from
'
../common/trialConfigMetadataKey
'
;
import
{
TrialConfigMetadataKey
}
from
'
../common/trialConfigMetadataKey
'
;
import
{
GPUScheduler
}
from
'
./gpuScheduler
'
;
import
{
GPUScheduler
}
from
'
./gpuScheduler
'
;
...
@@ -169,14 +169,7 @@ class LocalTrainingService implements TrainingService {
...
@@ -169,14 +169,7 @@ class LocalTrainingService implements TrainingService {
return
this
.
getHostJob
(
trialJobId
);
return
this
.
getHostJob
(
trialJobId
);
}
}
if
(
trialJob
.
status
===
'
RUNNING
'
)
{
if
(
trialJob
.
status
===
'
RUNNING
'
)
{
let
alive
:
boolean
=
false
;
let
alive
:
boolean
=
await
isAlive
(
trialJob
.
pid
);
try
{
await
cpp
.
exec
(
`kill -0
${
trialJob
.
pid
}
`
);
alive
=
true
;
}
catch
(
error
)
{
//ignore
}
if
(
!
alive
)
{
if
(
!
alive
)
{
trialJob
.
endTime
=
Date
.
now
();
trialJob
.
endTime
=
Date
.
now
();
this
.
setTrialJobStatus
(
trialJob
,
'
FAILED
'
);
this
.
setTrialJobStatus
(
trialJob
,
'
FAILED
'
);
...
@@ -284,7 +277,9 @@ class LocalTrainingService implements TrainingService {
...
@@ -284,7 +277,9 @@ class LocalTrainingService implements TrainingService {
public
async
setClusterMetadata
(
key
:
string
,
value
:
string
):
Promise
<
void
>
{
public
async
setClusterMetadata
(
key
:
string
,
value
:
string
):
Promise
<
void
>
{
if
(
!
this
.
initialized
)
{
if
(
!
this
.
initialized
)
{
this
.
rootDir
=
getExperimentRootDir
();
this
.
rootDir
=
getExperimentRootDir
();
await
cpp
.
exec
(
`mkdir -p
${
this
.
rootDir
}
`
);
if
(
!
fs
.
existsSync
(
this
.
rootDir
)){
await
cpp
.
exec
(
`powershell.exe mkdir
${
this
.
rootDir
}
`
);
}
this
.
initialized
=
true
;
this
.
initialized
=
true
;
}
}
switch
(
key
)
{
switch
(
key
)
{
...
@@ -369,7 +364,7 @@ class LocalTrainingService implements TrainingService {
...
@@ -369,7 +364,7 @@ class LocalTrainingService implements TrainingService {
private
getEnvironmentVariables
(
private
getEnvironmentVariables
(
trialJobDetail
:
TrialJobDetail
,
trialJobDetail
:
TrialJobDetail
,
resource
?
:
{
gpuIndices
:
number
[]
}):
{
key
:
string
;
value
:
string
}[]
{
resource
:
{
gpuIndices
:
number
[]
}):
{
key
:
string
;
value
:
string
}[]
{
const
envVariables
:
{
key
:
string
;
value
:
string
}[]
=
[
const
envVariables
:
{
key
:
string
;
value
:
string
}[]
=
[
{
key
:
'
NNI_PLATFORM
'
,
value
:
'
local
'
},
{
key
:
'
NNI_PLATFORM
'
,
value
:
'
local
'
},
{
key
:
'
NNI_SYS_DIR
'
,
value
:
trialJobDetail
.
workingDirectory
},
{
key
:
'
NNI_SYS_DIR
'
,
value
:
trialJobDetail
.
workingDirectory
},
...
@@ -379,12 +374,10 @@ class LocalTrainingService implements TrainingService {
...
@@ -379,12 +374,10 @@ class LocalTrainingService implements TrainingService {
{
key
:
'
MULTI_PHASE
'
,
value
:
this
.
isMultiPhase
.
toString
()
}
{
key
:
'
MULTI_PHASE
'
,
value
:
this
.
isMultiPhase
.
toString
()
}
];
];
if
(
resource
!==
undefined
&&
resource
.
gpuIndices
.
length
>
0
)
{
envVariables
.
push
({
envVariables
.
push
({
key
:
'
CUDA_VISIBLE_DEVICES
'
,
key
:
'
CUDA_VISIBLE_DEVICES
'
,
value
:
this
.
gpuScheduler
===
undefined
?
''
:
resource
.
gpuIndices
.
join
(
'
,
'
)
value
:
this
.
gpuScheduler
===
undefined
?
'
-1
'
:
resource
.
gpuIndices
.
join
(
'
,
'
)
});
});
}
return
envVariables
;
return
envVariables
;
}
}
...
@@ -467,36 +460,52 @@ class LocalTrainingService implements TrainingService {
...
@@ -467,36 +460,52 @@ class LocalTrainingService implements TrainingService {
}
}
}
}
private
getScript
(
localTrailConfig
:
TrialConfig
,
workingDirectory
:
string
):
string
[]{
let
script
:
string
[]
=
[];
if
(
process
.
platform
===
"
win32
"
)
{
script
.
push
(
`cmd /c
${
localTrailConfig
.
command
}
2>
${
path
.
join
(
workingDirectory
,
'
stderr
'
)}
`
,
`$NOW_DATE = [int64](([datetime]::UtcNow)-(get-date "1/1/1970")).TotalSeconds`
,
`$NOW_DATE = "$NOW_DATE" + "000"`
,
`Write $LASTEXITCODE " " $NOW_DATE | Out-File
${
path
.
join
(
workingDirectory
,
'
.nni
'
,
'
state
'
)}
-NoNewline -encoding utf8`
);
}
else
{
script
.
push
(
`eval
${
localTrailConfig
.
command
}
2>
${
path
.
join
(
workingDirectory
,
'
stderr
'
)}
`
,
`echo $?
\`
date +%s000
\`
>
${
path
.
join
(
workingDirectory
,
'
.nni
'
,
'
state
'
)}
`
);
}
return
script
;
}
private
async
runTrialJob
(
trialJobId
:
string
,
resource
:
{
gpuIndices
:
number
[]}):
Promise
<
void
>
{
private
async
runTrialJob
(
trialJobId
:
string
,
resource
:
{
gpuIndices
:
number
[]}):
Promise
<
void
>
{
const
trialJobDetail
:
LocalTrialJobDetail
=
<
LocalTrialJobDetail
>
this
.
jobMap
.
get
(
trialJobId
);
const
trialJobDetail
:
LocalTrialJobDetail
=
<
LocalTrialJobDetail
>
this
.
jobMap
.
get
(
trialJobId
);
const
variables
:
{
key
:
string
;
value
:
string
}[]
=
this
.
getEnvironmentVariables
(
trialJobDetail
,
resource
);
const
variables
:
{
key
:
string
;
value
:
string
}[]
=
this
.
getEnvironmentVariables
(
trialJobDetail
,
resource
);
const
runScriptLines
:
string
[]
=
[];
if
(
!
this
.
localTrailConfig
)
{
if
(
!
this
.
localTrailConfig
)
{
throw
new
Error
(
'
trial config is not initialized
'
);
throw
new
Error
(
'
trial config is not initialized
'
);
}
}
runScriptLines
.
push
(
const
runScriptLines
:
string
[]
=
[];
'
#!/bin/bash
'
,
if
(
process
.
platform
!==
"
win32
"
){
`cd
${
this
.
localTrailConfig
.
codeDir
}
`
);
runScriptLines
.
push
(
'
#!/bin/bash
'
);
}
runScriptLines
.
push
(
`cd
${
this
.
localTrailConfig
.
codeDir
}
`
);
for
(
const
variable
of
variables
)
{
for
(
const
variable
of
variables
)
{
runScriptLines
.
push
(
`export
${
variable
.
key
}
=
${
variable
.
value
}
`
);
runScriptLines
.
push
(
setEnvironmentVariable
(
variable
)
);
}
}
runScriptLines
.
push
(
const
scripts
:
string
[]
=
this
.
getScript
(
this
.
localTrailConfig
,
trialJobDetail
.
workingDirectory
);
`eval
${
this
.
localTrailConfig
.
command
}
2>
${
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
stderr
'
)}
`
,
scripts
.
forEach
(
script
=>
{
`echo $?
\`
date +%s000
\`
>
${
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
.nni
'
,
'
state
'
)}
`
);
runScriptLines
.
push
(
script
);
});
await
cpp
.
exec
(
`m
kdir
-p
${
trialJobDetail
.
workingDirectory
}
`
);
await
exec
M
kdir
(
trialJobDetail
.
workingDirectory
);
await
cpp
.
exec
(
`m
kdir
-p
${
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
.nni
'
)
}
`
);
await
exec
M
kdir
(
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
.nni
'
));
await
cpp
.
exec
(
`touch
${
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
.nni
'
,
'
metrics
'
)
}
`
);
await
execNewFile
(
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
.nni
'
,
'
metrics
'
));
await
fs
.
promises
.
writeFile
(
const
scriptName
:
string
=
getScriptName
(
'
run
'
);
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
run.sh
'
),
runScriptLines
.
join
(
'
\n
'
),
{
encoding
:
'
utf8
'
,
mode
:
0o777
});
await
fs
.
promises
.
writeFile
(
path
.
join
(
trialJobDetail
.
workingDirectory
,
scriptName
),
runScriptLines
.
join
(
getNewLine
()
),
{
encoding
:
'
utf8
'
,
mode
:
0o777
});
await
this
.
writeParameterFile
(
trialJobDetail
.
workingDirectory
,
(
<
TrialJobApplicationForm
>
trialJobDetail
.
form
).
hyperParameters
);
await
this
.
writeParameterFile
(
trialJobDetail
.
workingDirectory
,
(
<
TrialJobApplicationForm
>
trialJobDetail
.
form
).
hyperParameters
);
const
process
:
cp
.
ChildProcess
=
cp
.
exec
(
`bash
${
path
.
join
(
trialJobDetail
.
workingDirectory
,
'
run.sh
'
)}
`
);
const
trialJobProcess
:
cp
.
ChildProcess
=
execScript
(
path
.
join
(
trialJobDetail
.
workingDirectory
,
scriptName
));
this
.
setTrialJobStatus
(
trialJobDetail
,
'
RUNNING
'
);
this
.
setTrialJobStatus
(
trialJobDetail
,
'
RUNNING
'
);
trialJobDetail
.
startTime
=
Date
.
now
();
trialJobDetail
.
startTime
=
Date
.
now
();
trialJobDetail
.
pid
=
p
rocess
.
pid
;
trialJobDetail
.
pid
=
trialJobP
rocess
.
pid
;
this
.
setExtraProperties
(
trialJobDetail
,
resource
);
this
.
setExtraProperties
(
trialJobDetail
,
resource
);
let
buffer
:
Buffer
=
Buffer
.
alloc
(
0
);
let
buffer
:
Buffer
=
Buffer
.
alloc
(
0
);
...
...
src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts
View file @
0663218b
...
@@ -46,7 +46,7 @@ import {
...
@@ -46,7 +46,7 @@ import {
RemoteMachineScheduleInfo
,
RemoteMachineScheduleResult
,
SSHClient
,
SSHClientManager
,
RemoteMachineScheduleInfo
,
RemoteMachineScheduleResult
,
SSHClient
,
SSHClientManager
,
RemoteMachineTrialJobDetail
,
ScheduleResultType
,
REMOTEMACHINE_TRIAL_COMMAND_FORMAT
RemoteMachineTrialJobDetail
,
ScheduleResultType
,
REMOTEMACHINE_TRIAL_COMMAND_FORMAT
}
from
'
./remoteMachineData
'
;
}
from
'
./remoteMachineData
'
;
import
{
GPU_INFO_COLLECTOR_FORMAT
}
from
'
../common/gpuData
'
;
import
{
GPU_INFO_COLLECTOR_FORMAT
_LINUX
}
from
'
../common/gpuData
'
;
import
{
SSHClientUtility
}
from
'
./sshClientUtility
'
;
import
{
SSHClientUtility
}
from
'
./sshClientUtility
'
;
import
{
validateCodeDir
}
from
'
../common/util
'
;
import
{
validateCodeDir
}
from
'
../common/util
'
;
import
{
RemoteMachineJobRestServer
}
from
'
./remoteMachineJobRestServer
'
;
import
{
RemoteMachineJobRestServer
}
from
'
./remoteMachineJobRestServer
'
;
...
@@ -452,7 +452,7 @@ class RemoteMachineTrainingService implements TrainingService {
...
@@ -452,7 +452,7 @@ class RemoteMachineTrainingService implements TrainingService {
let
gpuMetricsCollectorScriptPath
:
string
=
path
.
join
(
gpuMetricCollectorScriptFolder
,
userName
,
'
gpu_metrics_collector.sh
'
);
let
gpuMetricsCollectorScriptPath
:
string
=
path
.
join
(
gpuMetricCollectorScriptFolder
,
userName
,
'
gpu_metrics_collector.sh
'
);
const
remoteGPUScriptsDir
:
string
=
this
.
getRemoteScriptsPath
(
userName
);
// This directory is used to store gpu_metrics and pid created by script
const
remoteGPUScriptsDir
:
string
=
this
.
getRemoteScriptsPath
(
userName
);
// This directory is used to store gpu_metrics and pid created by script
const
gpuMetricsCollectorScriptContent
:
string
=
String
.
Format
(
const
gpuMetricsCollectorScriptContent
:
string
=
String
.
Format
(
GPU_INFO_COLLECTOR_FORMAT
,
GPU_INFO_COLLECTOR_FORMAT
_LINUX
,
remoteGPUScriptsDir
,
remoteGPUScriptsDir
,
path
.
join
(
remoteGPUScriptsDir
,
'
pid
'
),
path
.
join
(
remoteGPUScriptsDir
,
'
pid
'
),
);
);
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment