Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
e29b58a1
"driver/device_direct_convolution_3.cuh" did not exist on "a5bcde36e3a53e6ee68ee48af96c7441f620f574"
Unverified
Commit
e29b58a1
authored
Apr 30, 2020
by
SparkSnail
Committed by
GitHub
Apr 30, 2020
Browse files
Merge pull request #244 from microsoft/master
merge master
parents
e0c2c0eb
4f88be1f
Changes
36
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
300 additions
and
288 deletions
+300
-288
src/nni_manager/training_service/remote_machine/sshClientUtility.ts
...nager/training_service/remote_machine/sshClientUtility.ts
+0
-154
src/nni_manager/training_service/remote_machine/test/linuxCommands.test.ts
...raining_service/remote_machine/test/linuxCommands.test.ts
+103
-0
src/nni_manager/training_service/remote_machine/test/shellExecutor.test.ts
...raining_service/remote_machine/test/shellExecutor.test.ts
+86
-0
src/nni_manager/training_service/test/sshClientUtility.test.ts
...ni_manager/training_service/test/sshClientUtility.test.ts
+0
-97
src/sdk/pynni/nni/nas/pytorch/base_mutator.py
src/sdk/pynni/nni/nas/pytorch/base_mutator.py
+4
-2
src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
+1
-1
src/sdk/pynni/nni/nas/pytorch/mutables.py
src/sdk/pynni/nni/nas/pytorch/mutables.py
+35
-10
src/sdk/pynni/nni/nas/pytorch/mutator.py
src/sdk/pynni/nni/nas/pytorch/mutator.py
+8
-6
src/sdk/pynni/nni/nas/pytorch/proxylessnas/mutator.py
src/sdk/pynni/nni/nas/pytorch/proxylessnas/mutator.py
+5
-3
src/webui/yarn.lock
src/webui/yarn.lock
+1
-1
test/config/integration_tests.yml
test/config/integration_tests.yml
+16
-0
test/nni_test/nnitest/validators.py
test/nni_test/nnitest/validators.py
+20
-0
test/pipelines/pipelines-it-pai-windows.yml
test/pipelines/pipelines-it-pai-windows.yml
+1
-1
test/pipelines/pipelines-it-pai.yml
test/pipelines/pipelines-it-pai.yml
+1
-1
tools/nni_cmd/nnictl_utils.py
tools/nni_cmd/nnictl_utils.py
+5
-4
tools/nni_gpu_tool/gpu_metrics_collector.py
tools/nni_gpu_tool/gpu_metrics_collector.py
+14
-8
No files found.
src/nni_manager/training_service/remote_machine/sshClientUtility.ts
deleted
100644 → 0
View file @
e0c2c0eb
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
assert
from
'
assert
'
;
import
*
as
os
from
'
os
'
;
import
*
as
path
from
'
path
'
;
import
{
Client
,
ClientChannel
,
SFTPWrapper
}
from
'
ssh2
'
;
import
*
as
stream
from
'
stream
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
NNIError
,
NNIErrorNames
}
from
'
../../common/errors
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
getRemoteTmpDir
,
uniqueString
,
unixPathJoin
}
from
'
../../common/utils
'
;
import
{
execRemove
,
tarAdd
}
from
'
../common/util
'
;
import
{
RemoteCommandResult
}
from
'
./remoteMachineData
'
;
/**
*
* Utility for frequent operations towards SSH client
*
*/
export
namespace
SSHClientUtility
{
/**
* Copy local file to remote path
* @param localFilePath the path of local file
* @param remoteFilePath the target path in remote machine
* @param sshClient SSH Client
*/
export
function
copyFileToRemote
(
localFilePath
:
string
,
remoteFilePath
:
string
,
sshClient
:
Client
):
Promise
<
boolean
>
{
const
log
:
Logger
=
getLogger
();
log
.
debug
(
`copyFileToRemote: localFilePath:
${
localFilePath
}
, remoteFilePath:
${
remoteFilePath
}
`
);
assert
(
sshClient
!==
undefined
);
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
sshClient
.
sftp
((
err
:
Error
,
sftp
:
SFTPWrapper
)
=>
{
if
(
err
!==
undefined
&&
err
!==
null
)
{
log
.
error
(
`copyFileToRemote:
${
err
.
message
}
,
${
localFilePath
}
,
${
remoteFilePath
}
`
);
deferred
.
reject
(
err
);
return
;
}
assert
(
sftp
!==
undefined
);
sftp
.
fastPut
(
localFilePath
,
remoteFilePath
,
(
fastPutErr
:
Error
)
=>
{
sftp
.
end
();
if
(
fastPutErr
!==
undefined
&&
fastPutErr
!==
null
)
{
deferred
.
reject
(
fastPutErr
);
}
else
{
deferred
.
resolve
(
true
);
}
});
});
return
deferred
.
promise
;
}
/**
* Execute command on remote machine
* @param command the command to execute remotely
* @param client SSH Client
*/
export
function
remoteExeCommand
(
command
:
string
,
client
:
Client
):
Promise
<
RemoteCommandResult
>
{
const
log
:
Logger
=
getLogger
();
log
.
debug
(
`remoteExeCommand: command: [
${
command
}
]`
);
const
deferred
:
Deferred
<
RemoteCommandResult
>
=
new
Deferred
<
RemoteCommandResult
>
();
let
stdout
:
string
=
''
;
let
stderr
:
string
=
''
;
let
exitCode
:
number
;
client
.
exec
(
command
,
(
err
:
Error
,
channel
:
ClientChannel
)
=>
{
if
(
err
!==
undefined
&&
err
!==
null
)
{
log
.
error
(
`remoteExeCommand:
${
err
.
message
}
`
);
deferred
.
reject
(
err
);
return
;
}
channel
.
on
(
'
data
'
,
(
data
:
any
,
dataStderr
:
any
)
=>
{
if
(
dataStderr
!==
undefined
&&
dataStderr
!==
null
)
{
stderr
+=
data
.
toString
();
}
else
{
stdout
+=
data
.
toString
();
}
})
.
on
(
'
exit
'
,
(
code
:
any
,
signal
:
any
)
=>
{
exitCode
=
<
number
>
code
;
deferred
.
resolve
({
stdout
:
stdout
,
stderr
:
stderr
,
exitCode
:
exitCode
});
});
});
return
deferred
.
promise
;
}
/**
* Copy files and directories in local directory recursively to remote directory
* @param localDirectory local diretory
* @param remoteDirectory remote directory
* @param sshClient SSH client
*/
export
async
function
copyDirectoryToRemote
(
localDirectory
:
string
,
remoteDirectory
:
string
,
sshClient
:
Client
,
remoteOS
:
string
):
Promise
<
void
>
{
const
tmpSuffix
:
string
=
uniqueString
(
5
);
const
localTarPath
:
string
=
path
.
join
(
os
.
tmpdir
(),
`nni_tmp_local_
${
tmpSuffix
}
.tar.gz`
);
const
remoteTarPath
:
string
=
unixPathJoin
(
getRemoteTmpDir
(
remoteOS
),
`nni_tmp_remote_
${
tmpSuffix
}
.tar.gz`
);
// Compress files in local directory to experiment root directory
await
tarAdd
(
localTarPath
,
localDirectory
);
// Copy the compressed file to remoteDirectory and delete it
await
copyFileToRemote
(
localTarPath
,
remoteTarPath
,
sshClient
);
await
execRemove
(
localTarPath
);
// Decompress the remote compressed file in and delete it
await
remoteExeCommand
(
`tar -oxzf
${
remoteTarPath
}
-C
${
remoteDirectory
}
`
,
sshClient
);
await
remoteExeCommand
(
`rm
${
remoteTarPath
}
`
,
sshClient
);
}
export
function
getRemoteFileContent
(
filePath
:
string
,
sshClient
:
Client
):
Promise
<
string
>
{
const
deferred
:
Deferred
<
string
>
=
new
Deferred
<
string
>
();
sshClient
.
sftp
((
err
:
Error
,
sftp
:
SFTPWrapper
)
=>
{
if
(
err
!==
undefined
&&
err
!==
null
)
{
getLogger
()
.
error
(
`getRemoteFileContent:
${
err
.
message
}
`
);
deferred
.
reject
(
new
Error
(
`SFTP error:
${
err
.
message
}
`
));
return
;
}
try
{
const
sftpStream
:
stream
.
Readable
=
sftp
.
createReadStream
(
filePath
);
let
dataBuffer
:
string
=
''
;
sftpStream
.
on
(
'
data
'
,
(
data
:
Buffer
|
string
)
=>
{
dataBuffer
+=
data
;
})
.
on
(
'
error
'
,
(
streamErr
:
Error
)
=>
{
sftp
.
end
();
deferred
.
reject
(
new
NNIError
(
NNIErrorNames
.
NOT_FOUND
,
streamErr
.
message
));
})
.
on
(
'
end
'
,
()
=>
{
// sftp connection need to be released manually once operation is done
sftp
.
end
();
deferred
.
resolve
(
dataBuffer
);
});
}
catch
(
error
)
{
getLogger
()
.
error
(
`getRemoteFileContent:
${
error
.
message
}
`
);
sftp
.
end
();
deferred
.
reject
(
new
Error
(
`SFTP error:
${
error
.
message
}
`
));
}
});
return
deferred
.
promise
;
}
}
src/nni_manager/training_service/remote_machine/test/linuxCommands.test.ts
0 → 100644
View file @
e29b58a1
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
chai
from
'
chai
'
;
import
*
as
chaiAsPromised
from
'
chai-as-promised
'
;
import
*
as
component
from
'
../../../common/component
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
}
from
'
../../../common/utils
'
;
import
{
LinuxCommands
}
from
'
../extends/linuxCommands
'
;
// import { TrialConfigMetadataKey } from '../trialConfigMetadataKey';
describe
(
'
Unit Test for linuxCommands
'
,
()
=>
{
let
linuxCommands
:
LinuxCommands
before
(()
=>
{
chai
.
should
();
chai
.
use
(
chaiAsPromised
);
prepareUnitTest
();
});
after
(()
=>
{
cleanupUnitTest
();
});
beforeEach
(()
=>
{
linuxCommands
=
component
.
get
(
LinuxCommands
);
});
afterEach
(()
=>
{
});
it
(
'
joinPath
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
joinPath
(
"
/root/
"
,
"
/first
"
)).
to
.
equal
(
"
/root/first
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
/root
"
,
"
first
"
)).
to
.
equal
(
"
/root/first
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
/root/
"
,
"
first
"
)).
to
.
equal
(
"
/root/first
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
root/
"
,
"
first
"
)).
to
.
equal
(
"
root/first
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
root/
"
)).
to
.
equal
(
"
root/
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
root
"
)).
to
.
equal
(
"
root
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
./root
"
)).
to
.
equal
(
"
./root
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
""
)).
to
.
equal
(
"
.
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
..
"
)).
to
.
equal
(
"
..
"
);
})
it
(
'
createFolder
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
createFolder
(
"
test
"
)).
to
.
equal
(
"
mkdir -p 'test'
"
);
chai
.
expect
(
linuxCommands
.
createFolder
(
"
test
"
,
true
)).
to
.
equal
(
"
umask 0; mkdir -p 'test'
"
);
})
it
(
'
allowPermission
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
allowPermission
(
true
,
"
test
"
,
"
test1
"
)).
to
.
equal
(
"
chmod 777 -R 'test' 'test1'
"
);
chai
.
expect
(
linuxCommands
.
allowPermission
(
false
,
"
test
"
)).
to
.
equal
(
"
chmod 777 'test'
"
);
})
it
(
'
removeFolder
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
removeFolder
(
"
test
"
)).
to
.
equal
(
"
rm -df 'test'
"
);
chai
.
expect
(
linuxCommands
.
removeFolder
(
"
test
"
,
true
)).
to
.
equal
(
"
rm -rf 'test'
"
);
chai
.
expect
(
linuxCommands
.
removeFolder
(
"
test
"
,
true
,
false
)).
to
.
equal
(
"
rm -r 'test'
"
);
chai
.
expect
(
linuxCommands
.
removeFolder
(
"
test
"
,
false
,
false
)).
to
.
equal
(
"
rm 'test'
"
);
})
it
(
'
removeFiles
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
removeFiles
(
"
test
"
,
"
*.sh
"
)).
to
.
equal
(
"
rm 'test/*.sh'
"
);
chai
.
expect
(
linuxCommands
.
removeFiles
(
"
test
"
,
""
)).
to
.
equal
(
"
rm 'test'
"
);
})
it
(
'
readLastLines
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
readLastLines
(
"
test
"
,
3
)).
to
.
equal
(
"
tail -n 3 'test'
"
);
})
it
(
'
isProcessAlive
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
isProcessAliveCommand
(
"
test
"
)).
to
.
equal
(
"
kill -0 `cat 'test'`
"
);
chai
.
expect
(
linuxCommands
.
isProcessAliveProcessOutput
(
{
exitCode
:
0
,
stdout
:
""
,
stderr
:
""
}
)).
to
.
equal
(
true
);
chai
.
expect
(
linuxCommands
.
isProcessAliveProcessOutput
(
{
exitCode
:
10
,
stdout
:
""
,
stderr
:
""
}
)).
to
.
equal
(
false
);
})
it
(
'
killChildProcesses
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
killChildProcesses
(
"
test
"
)).
to
.
equal
(
"
pkill -P `cat 'test'`
"
);
})
it
(
'
extractFile
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
extractFile
(
"
test.tar
"
,
"
testfolder
"
)).
to
.
equal
(
"
tar -oxzf 'test.tar' -C 'testfolder'
"
);
})
it
(
'
executeScript
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
executeScript
(
"
test.sh
"
,
true
)).
to
.
equal
(
"
bash 'test.sh'
"
);
chai
.
expect
(
linuxCommands
.
executeScript
(
"
test script'
\"
"
,
false
)).
to
.
equal
(
`bash -c \"test script'\\""`
);
})
});
src/nni_manager/training_service/remote_machine/test/shellExecutor.test.ts
0 → 100644
View file @
e29b58a1
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
*
as
fs
from
'
fs
'
;
import
*
as
chai
from
'
chai
'
;
import
*
as
chaiAsPromised
from
'
chai-as-promised
'
;
import
{
Client
}
from
'
ssh2
'
;
import
{
ShellExecutor
}
from
'
../shellExecutor
'
;
import
{
prepareUnitTest
,
cleanupUnitTest
}
from
'
../../../common/utils
'
;
const
LOCALFILE
:
string
=
'
/tmp/localSshclientUTData
'
;
const
REMOTEFILE
:
string
=
'
/tmp/remoteSshclientUTData
'
;
const
REMOTEFOLDER
:
string
=
'
/tmp/remoteSshclientUTFolder
'
;
async
function
copyFile
(
executor
:
ShellExecutor
):
Promise
<
void
>
{
await
executor
.
copyFileToRemote
(
LOCALFILE
,
REMOTEFILE
);
}
async
function
copyFileToRemoteLoop
(
executor
:
ShellExecutor
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
10
;
i
++
)
{
// console.log(i);
await
executor
.
copyFileToRemote
(
LOCALFILE
,
REMOTEFILE
);
}
}
async
function
getRemoteFileContentLoop
(
executor
:
ShellExecutor
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
10
;
i
++
)
{
// console.log(i);
await
executor
.
getRemoteFileContent
(
REMOTEFILE
);
}
}
describe
(
'
ShellExecutor test
'
,
()
=>
{
let
skip
:
boolean
=
false
;
let
rmMeta
:
any
;
try
{
rmMeta
=
JSON
.
parse
(
fs
.
readFileSync
(
'
../../.vscode/rminfo.json
'
,
'
utf8
'
));
console
.
log
(
rmMeta
);
}
catch
(
err
)
{
console
.
log
(
`Please configure rminfo.json to enable remote machine test.
${
err
}
`
);
skip
=
true
;
}
before
(
async
()
=>
{
chai
.
should
();
chai
.
use
(
chaiAsPromised
);
await
cpp
.
exec
(
`echo '1234' >
${
LOCALFILE
}
`
);
prepareUnitTest
();
});
after
(()
=>
{
cleanupUnitTest
();
fs
.
unlinkSync
(
LOCALFILE
);
});
it
(
'
Test mkdir
'
,
async
()
=>
{
if
(
skip
)
{
return
;
}
const
shellExecutor
:
ShellExecutor
=
new
ShellExecutor
();
await
shellExecutor
.
initialize
(
rmMeta
);
let
result
=
await
shellExecutor
.
createFolder
(
REMOTEFOLDER
,
false
);
chai
.
expect
(
result
).
eq
(
true
);
result
=
await
shellExecutor
.
removeFolder
(
REMOTEFOLDER
);
chai
.
expect
(
result
).
eq
(
true
);
});
it
(
'
Test ShellExecutor
'
,
async
()
=>
{
if
(
skip
)
{
return
;
}
const
shellExecutor
:
ShellExecutor
=
new
ShellExecutor
();
await
shellExecutor
.
initialize
(
rmMeta
);
await
copyFile
(
shellExecutor
);
await
Promise
.
all
([
copyFileToRemoteLoop
(
shellExecutor
),
copyFileToRemoteLoop
(
shellExecutor
),
copyFileToRemoteLoop
(
shellExecutor
),
getRemoteFileContentLoop
(
shellExecutor
)
]);
});
});
src/nni_manager/training_service/test/sshClientUtility.test.ts
deleted
100644 → 0
View file @
e0c2c0eb
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
*
as
fs
from
'
fs
'
;
import
{
Client
}
from
'
ssh2
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
SSHClientUtility
}
from
'
../remote_machine/sshClientUtility
'
;
const
LOCALFILE
:
string
=
'
/tmp/sshclientUTData
'
;
const
REMOTEFILE
:
string
=
'
/tmp/sshclientUTData
'
;
async
function
copyFile
(
conn
:
Client
):
Promise
<
void
>
{
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
conn
.
sftp
((
err
,
sftp
)
=>
{
if
(
err
)
{
deferred
.
reject
(
err
);
return
;
}
sftp
.
fastPut
(
LOCALFILE
,
REMOTEFILE
,
(
fastPutErr
:
Error
)
=>
{
sftp
.
end
();
if
(
fastPutErr
)
{
deferred
.
reject
(
fastPutErr
);
}
else
{
deferred
.
resolve
();
}
}
);
});
return
deferred
.
promise
;
}
async
function
copyFileToRemoteLoop
(
conn
:
Client
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
500
;
i
++
)
{
console
.
log
(
i
);
await
SSHClientUtility
.
copyFileToRemote
(
LOCALFILE
,
REMOTEFILE
,
conn
);
}
}
async
function
remoteExeCommandLoop
(
conn
:
Client
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
500
;
i
++
)
{
console
.
log
(
i
);
await
SSHClientUtility
.
remoteExeCommand
(
'
ls
'
,
conn
);
}
}
async
function
getRemoteFileContentLoop
(
conn
:
Client
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
500
;
i
++
)
{
console
.
log
(
i
);
await
SSHClientUtility
.
getRemoteFileContent
(
REMOTEFILE
,
conn
);
}
}
describe
(
'
sshClientUtility test
'
,
()
=>
{
let
skip
:
boolean
=
true
;
let
rmMeta
:
any
;
try
{
rmMeta
=
JSON
.
parse
(
fs
.
readFileSync
(
'
../../.vscode/rminfo.json
'
,
'
utf8
'
));
}
catch
(
err
)
{
skip
=
true
;
}
before
(
async
()
=>
{
await
cpp
.
exec
(
`echo '1234' >
${
LOCALFILE
}
`
);
});
after
(()
=>
{
fs
.
unlinkSync
(
LOCALFILE
);
});
it
(
'
Test SSHClientUtility
'
,
(
done
)
=>
{
if
(
skip
)
{
done
();
return
;
}
const
conn
:
Client
=
new
Client
();
conn
.
on
(
'
ready
'
,
async
()
=>
{
await
copyFile
(
conn
);
await
Promise
.
all
([
copyFileToRemoteLoop
(
conn
),
copyFileToRemoteLoop
(
conn
),
copyFileToRemoteLoop
(
conn
),
remoteExeCommandLoop
(
conn
),
getRemoteFileContentLoop
(
conn
)
]);
done
();
}).
connect
(
rmMeta
);
});
});
src/sdk/pynni/nni/nas/pytorch/base_mutator.py
View file @
e29b58a1
...
@@ -104,7 +104,7 @@ class BaseMutator(nn.Module):
...
@@ -104,7 +104,7 @@ class BaseMutator(nn.Module):
"""
"""
pass
pass
def
on_forward_layer_choice
(
self
,
mutable
,
*
input
s
):
def
on_forward_layer_choice
(
self
,
mutable
,
*
args
,
**
kwarg
s
):
"""
"""
Callbacks of forward in LayerChoice.
Callbacks of forward in LayerChoice.
...
@@ -112,8 +112,10 @@ class BaseMutator(nn.Module):
...
@@ -112,8 +112,10 @@ class BaseMutator(nn.Module):
----------
----------
mutable : LayerChoice
mutable : LayerChoice
Module whose forward is called.
Module whose forward is called.
input
s : list of torch.Tensor
arg
s : list of torch.Tensor
The arguments of its forward function.
The arguments of its forward function.
kwargs : dict
The keyword arguments of its forward function.
Returns
Returns
-------
-------
...
...
src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
View file @
e29b58a1
...
@@ -203,7 +203,7 @@ class ClassicMutator(Mutator):
...
@@ -203,7 +203,7 @@ class ClassicMutator(Mutator):
# for now we only generate flattened search space
# for now we only generate flattened search space
if
isinstance
(
mutable
,
LayerChoice
):
if
isinstance
(
mutable
,
LayerChoice
):
key
=
mutable
.
key
key
=
mutable
.
key
val
=
[
repr
(
choice
)
for
choice
in
mutable
.
choic
es
]
val
=
mutable
.
nam
es
search_space
[
key
]
=
{
"_type"
:
LAYER_CHOICE
,
"_value"
:
val
}
search_space
[
key
]
=
{
"_type"
:
LAYER_CHOICE
,
"_value"
:
val
}
elif
isinstance
(
mutable
,
InputChoice
):
elif
isinstance
(
mutable
,
InputChoice
):
key
=
mutable
.
key
key
=
mutable
.
key
...
...
src/sdk/pynni/nni/nas/pytorch/mutables.py
View file @
e29b58a1
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
# Licensed under the MIT license.
# Licensed under the MIT license.
import
logging
import
logging
from
collections
import
OrderedDict
import
torch.nn
as
nn
import
torch.nn
as
nn
...
@@ -58,9 +59,6 @@ class Mutable(nn.Module):
...
@@ -58,9 +59,6 @@ class Mutable(nn.Module):
"Or did you apply multiple fixed architectures?"
)
"Or did you apply multiple fixed architectures?"
)
self
.
__dict__
[
"mutator"
]
=
mutator
self
.
__dict__
[
"mutator"
]
=
mutator
def
forward
(
self
,
*
inputs
):
raise
NotImplementedError
@
property
@
property
def
key
(
self
):
def
key
(
self
):
"""
"""
...
@@ -86,9 +84,6 @@ class Mutable(nn.Module):
...
@@ -86,9 +84,6 @@ class Mutable(nn.Module):
"Or did you initialize a mutable on the fly in forward pass? Move to `__init__` "
"Or did you initialize a mutable on the fly in forward pass? Move to `__init__` "
"so that trainer can locate all your mutables. See NNI docs for more details."
.
format
(
self
))
"so that trainer can locate all your mutables. See NNI docs for more details."
.
format
(
self
))
def
__repr__
(
self
):
return
"{} ({})"
.
format
(
self
.
name
,
self
.
key
)
class
MutableScope
(
Mutable
):
class
MutableScope
(
Mutable
):
"""
"""
...
@@ -131,7 +126,7 @@ class LayerChoice(Mutable):
...
@@ -131,7 +126,7 @@ class LayerChoice(Mutable):
Parameters
Parameters
----------
----------
op_candidates : list of nn.Module
op_candidates : list of nn.Module
or OrderedDict
A module list to be selected from.
A module list to be selected from.
reduction : str
reduction : str
``mean``, ``concat``, ``sum`` or ``none``. Policy if multiples are selected.
``mean``, ``concat``, ``sum`` or ``none``. Policy if multiples are selected.
...
@@ -146,23 +141,53 @@ class LayerChoice(Mutable):
...
@@ -146,23 +141,53 @@ class LayerChoice(Mutable):
----------
----------
length : int
length : int
Number of ops to choose from.
Number of ops to choose from.
names: list of str
Names of candidates.
Notes
-----
``op_candidates`` can be a list of modules or a ordered dict of named modules, for example,
.. code-block:: python
self.op_choice = LayerChoice(OrderedDict([
("conv3x3", nn.Conv2d(3, 16, 128)),
("conv5x5", nn.Conv2d(5, 16, 128)),
("conv7x7", nn.Conv2d(7, 16, 128))
]))
"""
"""
def
__init__
(
self
,
op_candidates
,
reduction
=
"sum"
,
return_mask
=
False
,
key
=
None
):
def
__init__
(
self
,
op_candidates
,
reduction
=
"sum"
,
return_mask
=
False
,
key
=
None
):
super
().
__init__
(
key
=
key
)
super
().
__init__
(
key
=
key
)
self
.
length
=
len
(
op_candidates
)
self
.
length
=
len
(
op_candidates
)
self
.
choices
=
nn
.
ModuleList
(
op_candidates
)
self
.
choices
=
[]
self
.
names
=
[]
if
isinstance
(
op_candidates
,
OrderedDict
):
for
name
,
module
in
op_candidates
.
items
():
assert
name
not
in
[
"length"
,
"reduction"
,
"return_mask"
,
"_key"
,
"key"
,
"names"
],
\
"Please don't use a reserved name '{}' for your module."
.
format
(
name
)
self
.
add_module
(
name
,
module
)
self
.
choices
.
append
(
module
)
self
.
names
.
append
(
name
)
elif
isinstance
(
op_candidates
,
list
):
for
i
,
module
in
enumerate
(
op_candidates
):
self
.
add_module
(
str
(
i
),
module
)
self
.
choices
.
append
(
module
)
self
.
names
.
append
(
str
(
i
))
else
:
raise
TypeError
(
"Unsupported op_candidates type: {}"
.
format
(
type
(
op_candidates
)))
self
.
reduction
=
reduction
self
.
reduction
=
reduction
self
.
return_mask
=
return_mask
self
.
return_mask
=
return_mask
def
forward
(
self
,
*
input
s
):
def
forward
(
self
,
*
args
,
**
kwarg
s
):
"""
"""
Returns
Returns
-------
-------
tuple of tensors
tuple of tensors
Output and selection mask. If ``return_mask`` is ``False``, only output is returned.
Output and selection mask. If ``return_mask`` is ``False``, only output is returned.
"""
"""
out
,
mask
=
self
.
mutator
.
on_forward_layer_choice
(
self
,
*
input
s
)
out
,
mask
=
self
.
mutator
.
on_forward_layer_choice
(
self
,
*
args
,
**
kwarg
s
)
if
self
.
return_mask
:
if
self
.
return_mask
:
return
out
,
mask
return
out
,
mask
return
out
return
out
...
...
src/sdk/pynni/nni/nas/pytorch/mutator.py
View file @
e29b58a1
...
@@ -128,7 +128,7 @@ class Mutator(BaseMutator):
...
@@ -128,7 +128,7 @@ class Mutator(BaseMutator):
result
[
"mutable"
][
mutable
.
key
].
append
(
path
)
result
[
"mutable"
][
mutable
.
key
].
append
(
path
)
return
result
return
result
def
on_forward_layer_choice
(
self
,
mutable
,
*
input
s
):
def
on_forward_layer_choice
(
self
,
mutable
,
*
args
,
**
kwarg
s
):
"""
"""
On default, this method retrieves the decision obtained previously, and select certain operations.
On default, this method retrieves the decision obtained previously, and select certain operations.
Only operations with non-zero weight will be executed. The results will be added to a list.
Only operations with non-zero weight will be executed. The results will be added to a list.
...
@@ -138,7 +138,9 @@ class Mutator(BaseMutator):
...
@@ -138,7 +138,9 @@ class Mutator(BaseMutator):
----------
----------
mutable : LayerChoice
mutable : LayerChoice
Layer choice module.
Layer choice module.
inputs : list of torch.Tensor
args : list of torch.Tensor
Inputs
kwargs : dict
Inputs
Inputs
Returns
Returns
...
@@ -148,16 +150,16 @@ class Mutator(BaseMutator):
...
@@ -148,16 +150,16 @@ class Mutator(BaseMutator):
"""
"""
if
self
.
_connect_all
:
if
self
.
_connect_all
:
return
self
.
_all_connect_tensor_reduction
(
mutable
.
reduction
,
return
self
.
_all_connect_tensor_reduction
(
mutable
.
reduction
,
[
op
(
*
input
s
)
for
op
in
mutable
.
choices
]),
\
[
op
(
*
args
,
**
kwarg
s
)
for
op
in
mutable
.
choices
]),
\
torch
.
ones
(
mutable
.
length
)
torch
.
ones
(
mutable
.
length
)
def
_map_fn
(
op
,
*
input
s
):
def
_map_fn
(
op
,
args
,
kwarg
s
):
return
op
(
*
input
s
)
return
op
(
*
args
,
**
kwarg
s
)
mask
=
self
.
_get_decision
(
mutable
)
mask
=
self
.
_get_decision
(
mutable
)
assert
len
(
mask
)
==
len
(
mutable
.
choices
),
\
assert
len
(
mask
)
==
len
(
mutable
.
choices
),
\
"Invalid mask, expected {} to be of length {}."
.
format
(
mask
,
len
(
mutable
.
choices
))
"Invalid mask, expected {} to be of length {}."
.
format
(
mask
,
len
(
mutable
.
choices
))
out
=
self
.
_select_with_mask
(
_map_fn
,
[(
choice
,
*
input
s
)
for
choice
in
mutable
.
choices
],
mask
)
out
=
self
.
_select_with_mask
(
_map_fn
,
[(
choice
,
args
,
kwarg
s
)
for
choice
in
mutable
.
choices
],
mask
)
return
self
.
_tensor_reduction
(
mutable
.
reduction
,
out
),
mask
return
self
.
_tensor_reduction
(
mutable
.
reduction
,
out
),
mask
def
on_forward_input_choice
(
self
,
mutable
,
tensor_list
):
def
on_forward_input_choice
(
self
,
mutable
,
tensor_list
):
...
...
src/sdk/pynni/nni/nas/pytorch/proxylessnas/mutator.py
View file @
e29b58a1
...
@@ -317,7 +317,7 @@ class ProxylessNasMutator(BaseMutator):
...
@@ -317,7 +317,7 @@ class ProxylessNasMutator(BaseMutator):
self
.
mutable_list
.
append
(
mutable
)
self
.
mutable_list
.
append
(
mutable
)
mutable
.
registered_module
=
MixedOp
(
mutable
)
mutable
.
registered_module
=
MixedOp
(
mutable
)
def
on_forward_layer_choice
(
self
,
mutable
,
*
input
s
):
def
on_forward_layer_choice
(
self
,
mutable
,
*
args
,
**
kwarg
s
):
"""
"""
Callback of layer choice forward. This function defines the forward
Callback of layer choice forward. This function defines the forward
logic of the input mutable. So mutable is only interface, its real
logic of the input mutable. So mutable is only interface, its real
...
@@ -327,7 +327,9 @@ class ProxylessNasMutator(BaseMutator):
...
@@ -327,7 +327,9 @@ class ProxylessNasMutator(BaseMutator):
----------
----------
mutable: LayerChoice
mutable: LayerChoice
forward logic of this input mutable
forward logic of this input mutable
inputs: list of torch.Tensor
args: list of torch.Tensor
inputs of this mutable
kwargs: dict
inputs of this mutable
inputs of this mutable
Returns
Returns
...
@@ -339,7 +341,7 @@ class ProxylessNasMutator(BaseMutator):
...
@@ -339,7 +341,7 @@ class ProxylessNasMutator(BaseMutator):
"""
"""
# FIXME: return mask, to be consistent with other algorithms
# FIXME: return mask, to be consistent with other algorithms
idx
=
mutable
.
registered_module
.
active_op_index
idx
=
mutable
.
registered_module
.
active_op_index
return
mutable
.
registered_module
(
mutable
,
*
input
s
),
idx
return
mutable
.
registered_module
(
mutable
,
*
args
,
**
kwarg
s
),
idx
def
reset_binary_gates
(
self
):
def
reset_binary_gates
(
self
):
"""
"""
...
...
src/webui/yarn.lock
View file @
e29b58a1
...
@@ -5593,7 +5593,7 @@ load-json-file@^4.0.0:
...
@@ -5593,7 +5593,7 @@ load-json-file@^4.0.0:
pify "^3.0.0"
pify "^3.0.0"
strip-bom "^3.0.0"
strip-bom "^3.0.0"
loader-fs-cache@>=1.0.3,
loader-fs-cache@^1.0.0:
loader-fs-cache@^1.0.0:
version "1.0.3"
version "1.0.3"
resolved "https://registry.yarnpkg.com/loader-fs-cache/-/loader-fs-cache-1.0.3.tgz#f08657646d607078be2f0a032f8bd69dd6f277d9"
resolved "https://registry.yarnpkg.com/loader-fs-cache/-/loader-fs-cache-1.0.3.tgz#f08657646d607078be2f0a032f8bd69dd6f277d9"
integrity sha512-ldcgZpjNJj71n+2Mf6yetz+c9bM4xpKtNds4LbqXzU/PTdeAX0g3ytnU1AJMEcTk2Lex4Smpe3Q/eCTsvUBxbA==
integrity sha512-ldcgZpjNJj71n+2Mf6yetz+c9bM4xpKtNds4LbqXzU/PTdeAX0g3ytnU1AJMEcTk2Lex4Smpe3Q/eCTsvUBxbA==
...
...
test/config/integration_tests.yml
View file @
e29b58a1
...
@@ -77,6 +77,14 @@ testCases:
...
@@ -77,6 +77,14 @@ testCases:
kwargs
:
kwargs
:
expected_result_file
:
expected_metrics.json
expected_result_file
:
expected_metrics.json
-
name
:
export-float
configFile
:
test/config/metrics_test/config.yml
config
:
maxTrialNum
:
1
trialConcurrency
:
1
validator
:
class
:
ExportValidator
-
name
:
metrics-dict
-
name
:
metrics-dict
configFile
:
test/config/metrics_test/config_dict_metrics.yml
configFile
:
test/config/metrics_test/config_dict_metrics.yml
config
:
config
:
...
@@ -87,6 +95,14 @@ testCases:
...
@@ -87,6 +95,14 @@ testCases:
kwargs
:
kwargs
:
expected_result_file
:
expected_metrics_dict.json
expected_result_file
:
expected_metrics_dict.json
-
name
:
export-dict
configFile
:
test/config/metrics_test/config_dict_metrics.yml
config
:
maxTrialNum
:
1
trialConcurrency
:
1
validator
:
class
:
ExportValidator
-
name
:
nnicli
-
name
:
nnicli
configFile
:
test/config/examples/sklearn-regression.yml
configFile
:
test/config/examples/sklearn-regression.yml
config
:
config
:
...
...
test/nni_test/nnitest/validators.py
View file @
e29b58a1
...
@@ -2,6 +2,8 @@
...
@@ -2,6 +2,8 @@
# Licensed under the MIT license.
# Licensed under the MIT license.
import
os.path
as
osp
import
os.path
as
osp
from
os
import
remove
import
subprocess
import
json
import
json
import
requests
import
requests
import
nnicli
as
nc
import
nnicli
as
nc
...
@@ -12,6 +14,24 @@ class ITValidator:
...
@@ -12,6 +14,24 @@ class ITValidator:
def
__call__
(
self
,
rest_endpoint
,
experiment_dir
,
nni_source_dir
,
**
kwargs
):
def
__call__
(
self
,
rest_endpoint
,
experiment_dir
,
nni_source_dir
,
**
kwargs
):
pass
pass
class
ExportValidator
(
ITValidator
):
def
__call__
(
self
,
rest_endpoint
,
experiment_dir
,
nni_source_dir
,
**
kwargs
):
exp_id
=
osp
.
split
(
experiment_dir
)[
-
1
]
proc1
=
subprocess
.
run
([
"nnictl"
,
"experiment"
,
"export"
,
exp_id
,
"-t"
,
"csv"
,
"-f"
,
"report.csv"
])
assert
proc1
.
returncode
==
0
,
'`nnictl experiment export -t csv` failed with code %d'
%
proc1
.
returncode
with
open
(
"report.csv"
,
'r'
)
as
f
:
print
(
'Exported CSV file:
\n
'
)
print
(
''
.
join
(
f
.
readlines
()))
print
(
'
\n\n
'
)
remove
(
'report.csv'
)
proc2
=
subprocess
.
run
([
"nnictl"
,
"experiment"
,
"export"
,
exp_id
,
"-t"
,
"json"
,
"-f"
,
"report.json"
])
assert
proc2
.
returncode
==
0
,
'`nnictl experiment export -t json` failed with code %d'
%
proc2
.
returncode
with
open
(
"report.json"
,
'r'
)
as
f
:
print
(
'Exported JSON file:
\n
'
)
print
(
'
\n
'
.
join
(
f
.
readlines
()))
print
(
'
\n\n
'
)
remove
(
'report.json'
)
class
MetricsValidator
(
ITValidator
):
class
MetricsValidator
(
ITValidator
):
def
__call__
(
self
,
rest_endpoint
,
experiment_dir
,
nni_source_dir
,
**
kwargs
):
def
__call__
(
self
,
rest_endpoint
,
experiment_dir
,
nni_source_dir
,
**
kwargs
):
...
...
test/pipelines/pipelines-it-pai-windows.yml
View file @
e29b58a1
...
@@ -70,5 +70,5 @@ jobs:
...
@@ -70,5 +70,5 @@ jobs:
python --version
python --version
mount -o anon $(pai_nfs_uri) $(local_nfs_uri)
mount -o anon $(pai_nfs_uri) $(local_nfs_uri)
python nni_test/nnitest/generate_ts_config.py --ts pai --pai_token $(pai_token) --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $(docker_image) --pai_storage_plugin $(pai_storage_plugin) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
python nni_test/nnitest/generate_ts_config.py --ts pai --pai_token $(pai_token) --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $(docker_image) --pai_storage_plugin $(pai_storage_plugin) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
--exclude multi-phase
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName
:
'
Examples
and
advanced
features
tests
on
pai'
displayName
:
'
Examples
and
advanced
features
tests
on
pai'
\ No newline at end of file
test/pipelines/pipelines-it-pai.yml
View file @
e29b58a1
...
@@ -57,5 +57,5 @@ jobs:
...
@@ -57,5 +57,5 @@ jobs:
cd test
cd test
python3 nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\
python3 nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\
--pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
--pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
--exclude multi-phase
PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName
:
'
integration
test'
displayName
:
'
integration
test'
tools/nni_cmd/nnictl_utils.py
View file @
e29b58a1
...
@@ -699,12 +699,13 @@ def export_trials_data(args):
...
@@ -699,12 +699,13 @@ def export_trials_data(args):
content
=
json
.
loads
(
response
.
text
)
content
=
json
.
loads
(
response
.
text
)
trial_records
=
[]
trial_records
=
[]
for
record
in
content
:
for
record
in
content
:
if
not
isinstance
(
record
[
'value'
],
(
float
,
int
)):
record_value
=
json
.
loads
(
record
[
'value'
])
formated_record
=
{
**
record
[
'parameter'
],
**
record
[
'value'
],
**
{
'id'
:
record
[
'id'
]}}
if
not
isinstance
(
record_value
,
(
float
,
int
)):
formated_record
=
{
**
record
[
'parameter'
],
**
record_value
,
**
{
'id'
:
record
[
'id'
]}}
else
:
else
:
formated_record
=
{
**
record
[
'parameter'
],
**
{
'reward'
:
record
[
'
value
'
]
,
'id'
:
record
[
'id'
]}}
formated_record
=
{
**
record
[
'parameter'
],
**
{
'reward'
:
record
_
value
,
'id'
:
record
[
'id'
]}}
trial_records
.
append
(
formated_record
)
trial_records
.
append
(
formated_record
)
with
open
(
args
.
path
,
'w'
)
as
file
:
with
open
(
args
.
path
,
'w'
,
newline
=
''
)
as
file
:
writer
=
csv
.
DictWriter
(
file
,
set
.
union
(
*
[
set
(
r
.
keys
())
for
r
in
trial_records
]))
writer
=
csv
.
DictWriter
(
file
,
set
.
union
(
*
[
set
(
r
.
keys
())
for
r
in
trial_records
]))
writer
.
writeheader
()
writer
.
writeheader
()
writer
.
writerows
(
trial_records
)
writer
.
writerows
(
trial_records
)
...
...
tools/nni_gpu_tool/gpu_metrics_collector.py
View file @
e29b58a1
...
@@ -10,27 +10,31 @@ import traceback
...
@@ -10,27 +10,31 @@ import traceback
from
xml.dom
import
minidom
from
xml.dom
import
minidom
def
check_ready_to_run
():
def
check_ready_to_run
():
if
sys
.
platform
==
'win32'
:
if
sys
.
platform
==
'win32'
:
pgrep_output
=
subprocess
.
check_output
(
pgrep_output
=
subprocess
.
check_output
(
'wmic process where "CommandLine like
\'
%nni_gpu_tool.gpu_metrics_collector%
\'
and name like
\'
%python%
\'
" get processId'
)
'wmic process where "CommandLine like
\'
%nni_gpu_tool.gpu_metrics_collector%
\'
and name like
\'
%python%
\'
" get processId'
)
pidList
=
pgrep_output
.
decode
(
"utf-8"
).
strip
().
split
()
pidList
=
pgrep_output
.
decode
(
"utf-8"
).
strip
().
split
()
pidList
.
pop
(
0
)
# remove the key word 'ProcessId'
pidList
.
pop
(
0
)
# remove the key word 'ProcessId'
pidList
=
list
(
map
(
int
,
pidList
))
pidList
=
list
(
map
(
int
,
pidList
))
pidList
.
remove
(
os
.
getpid
())
pidList
.
remove
(
os
.
getpid
())
return
not
pidList
return
not
pidList
else
:
else
:
pgrep_output
=
subprocess
.
check_output
(
'pgrep -f
x
u "$(whoami)"
\'
python3 -m nni_gpu_tool.gpu_metrics_collector
\'
'
,
shell
=
True
)
pgrep_output
=
subprocess
.
check_output
(
'pgrep -
a
fu "$(whoami)"
\'
python3 -m nni_gpu_tool.gpu_metrics_collector
\'
'
,
shell
=
True
)
pidList
=
[]
pidList
=
[]
for
pid
in
pgrep_output
.
splitlines
():
for
pid
in
pgrep_output
.
splitlines
():
pidList
.
append
(
int
(
pid
))
pid
=
pid
.
decode
()
pidList
.
remove
(
os
.
getpid
())
if
"pgrep "
in
pid
or
pid
.
startswith
(
'%s '
%
os
.
getpid
())
or
pid
.
startswith
(
'%s '
%
os
.
getppid
()):
continue
pidList
.
append
(
pid
)
return
not
pidList
return
not
pidList
def
main
(
argv
):
def
main
(
argv
):
metrics_output_dir
=
os
.
environ
[
'METRIC_OUTPUT_DIR'
]
metrics_output_dir
=
os
.
environ
[
'METRIC_OUTPUT_DIR'
]
if
check_ready_to_run
()
==
False
:
if
check_ready_to_run
()
==
False
:
#
GPU metrics collector is already running.
E
xit
print
(
"
GPU metrics collector is already running.
e
xit
ing..."
)
exit
(
2
)
exit
(
2
)
cmd
=
'nvidia-smi -q -x'
.
split
()
cmd
=
'nvidia-smi -q -x'
.
split
()
while
(
True
):
while
(
True
):
...
@@ -44,6 +48,7 @@ def main(argv):
...
@@ -44,6 +48,7 @@ def main(argv):
# TODO: change to sleep time configurable via arguments
# TODO: change to sleep time configurable via arguments
time
.
sleep
(
5
)
time
.
sleep
(
5
)
def
parse_nvidia_smi_result
(
smi
,
outputDir
):
def
parse_nvidia_smi_result
(
smi
,
outputDir
):
try
:
try
:
old_umask
=
os
.
umask
(
0
)
old_umask
=
os
.
umask
(
0
)
...
@@ -70,13 +75,14 @@ def parse_nvidia_smi_result(smi, outputDir):
...
@@ -70,13 +75,14 @@ def parse_nvidia_smi_result(smi, outputDir):
outPut
[
"gpuInfos"
].
append
(
gpuInfo
)
outPut
[
"gpuInfos"
].
append
(
gpuInfo
)
print
(
outPut
)
print
(
outPut
)
outputFile
.
write
(
"{}
\n
"
.
format
(
json
.
dumps
(
outPut
,
sort_keys
=
True
)))
outputFile
.
write
(
"{}
\n
"
.
format
(
json
.
dumps
(
outPut
,
sort_keys
=
True
)))
outputFile
.
flush
()
;
outputFile
.
flush
()
except
:
except
Exception
as
error
:
# e_info = sys.exc_info()
# e_info = sys.exc_info()
print
(
'
xmldoc paring error'
)
print
(
'
gpu_metrics_collector error: %s'
%
error
)
finally
:
finally
:
os
.
umask
(
old_umask
)
os
.
umask
(
old_umask
)
def
gen_empty_gpu_metric
(
outputDir
):
def
gen_empty_gpu_metric
(
outputDir
):
try
:
try
:
old_umask
=
os
.
umask
(
0
)
old_umask
=
os
.
umask
(
0
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment