Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
e29b58a1
Unverified
Commit
e29b58a1
authored
Apr 30, 2020
by
SparkSnail
Committed by
GitHub
Apr 30, 2020
Browse files
Merge pull request #244 from microsoft/master
merge master
parents
e0c2c0eb
4f88be1f
Changes
36
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
300 additions
and
288 deletions
+300
-288
src/nni_manager/training_service/remote_machine/sshClientUtility.ts
...nager/training_service/remote_machine/sshClientUtility.ts
+0
-154
src/nni_manager/training_service/remote_machine/test/linuxCommands.test.ts
...raining_service/remote_machine/test/linuxCommands.test.ts
+103
-0
src/nni_manager/training_service/remote_machine/test/shellExecutor.test.ts
...raining_service/remote_machine/test/shellExecutor.test.ts
+86
-0
src/nni_manager/training_service/test/sshClientUtility.test.ts
...ni_manager/training_service/test/sshClientUtility.test.ts
+0
-97
src/sdk/pynni/nni/nas/pytorch/base_mutator.py
src/sdk/pynni/nni/nas/pytorch/base_mutator.py
+4
-2
src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
+1
-1
src/sdk/pynni/nni/nas/pytorch/mutables.py
src/sdk/pynni/nni/nas/pytorch/mutables.py
+35
-10
src/sdk/pynni/nni/nas/pytorch/mutator.py
src/sdk/pynni/nni/nas/pytorch/mutator.py
+8
-6
src/sdk/pynni/nni/nas/pytorch/proxylessnas/mutator.py
src/sdk/pynni/nni/nas/pytorch/proxylessnas/mutator.py
+5
-3
src/webui/yarn.lock
src/webui/yarn.lock
+1
-1
test/config/integration_tests.yml
test/config/integration_tests.yml
+16
-0
test/nni_test/nnitest/validators.py
test/nni_test/nnitest/validators.py
+20
-0
test/pipelines/pipelines-it-pai-windows.yml
test/pipelines/pipelines-it-pai-windows.yml
+1
-1
test/pipelines/pipelines-it-pai.yml
test/pipelines/pipelines-it-pai.yml
+1
-1
tools/nni_cmd/nnictl_utils.py
tools/nni_cmd/nnictl_utils.py
+5
-4
tools/nni_gpu_tool/gpu_metrics_collector.py
tools/nni_gpu_tool/gpu_metrics_collector.py
+14
-8
No files found.
src/nni_manager/training_service/remote_machine/sshClientUtility.ts
deleted
100644 → 0
View file @
e0c2c0eb
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
assert
from
'
assert
'
;
import
*
as
os
from
'
os
'
;
import
*
as
path
from
'
path
'
;
import
{
Client
,
ClientChannel
,
SFTPWrapper
}
from
'
ssh2
'
;
import
*
as
stream
from
'
stream
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
NNIError
,
NNIErrorNames
}
from
'
../../common/errors
'
;
import
{
getLogger
,
Logger
}
from
'
../../common/log
'
;
import
{
getRemoteTmpDir
,
uniqueString
,
unixPathJoin
}
from
'
../../common/utils
'
;
import
{
execRemove
,
tarAdd
}
from
'
../common/util
'
;
import
{
RemoteCommandResult
}
from
'
./remoteMachineData
'
;
/**
*
* Utility for frequent operations towards SSH client
*
*/
export
namespace
SSHClientUtility
{
/**
* Copy local file to remote path
* @param localFilePath the path of local file
* @param remoteFilePath the target path in remote machine
* @param sshClient SSH Client
*/
export
function
copyFileToRemote
(
localFilePath
:
string
,
remoteFilePath
:
string
,
sshClient
:
Client
):
Promise
<
boolean
>
{
const
log
:
Logger
=
getLogger
();
log
.
debug
(
`copyFileToRemote: localFilePath:
${
localFilePath
}
, remoteFilePath:
${
remoteFilePath
}
`
);
assert
(
sshClient
!==
undefined
);
const
deferred
:
Deferred
<
boolean
>
=
new
Deferred
<
boolean
>
();
sshClient
.
sftp
((
err
:
Error
,
sftp
:
SFTPWrapper
)
=>
{
if
(
err
!==
undefined
&&
err
!==
null
)
{
log
.
error
(
`copyFileToRemote:
${
err
.
message
}
,
${
localFilePath
}
,
${
remoteFilePath
}
`
);
deferred
.
reject
(
err
);
return
;
}
assert
(
sftp
!==
undefined
);
sftp
.
fastPut
(
localFilePath
,
remoteFilePath
,
(
fastPutErr
:
Error
)
=>
{
sftp
.
end
();
if
(
fastPutErr
!==
undefined
&&
fastPutErr
!==
null
)
{
deferred
.
reject
(
fastPutErr
);
}
else
{
deferred
.
resolve
(
true
);
}
});
});
return
deferred
.
promise
;
}
/**
* Execute command on remote machine
* @param command the command to execute remotely
* @param client SSH Client
*/
export
function
remoteExeCommand
(
command
:
string
,
client
:
Client
):
Promise
<
RemoteCommandResult
>
{
const
log
:
Logger
=
getLogger
();
log
.
debug
(
`remoteExeCommand: command: [
${
command
}
]`
);
const
deferred
:
Deferred
<
RemoteCommandResult
>
=
new
Deferred
<
RemoteCommandResult
>
();
let
stdout
:
string
=
''
;
let
stderr
:
string
=
''
;
let
exitCode
:
number
;
client
.
exec
(
command
,
(
err
:
Error
,
channel
:
ClientChannel
)
=>
{
if
(
err
!==
undefined
&&
err
!==
null
)
{
log
.
error
(
`remoteExeCommand:
${
err
.
message
}
`
);
deferred
.
reject
(
err
);
return
;
}
channel
.
on
(
'
data
'
,
(
data
:
any
,
dataStderr
:
any
)
=>
{
if
(
dataStderr
!==
undefined
&&
dataStderr
!==
null
)
{
stderr
+=
data
.
toString
();
}
else
{
stdout
+=
data
.
toString
();
}
})
.
on
(
'
exit
'
,
(
code
:
any
,
signal
:
any
)
=>
{
exitCode
=
<
number
>
code
;
deferred
.
resolve
({
stdout
:
stdout
,
stderr
:
stderr
,
exitCode
:
exitCode
});
});
});
return
deferred
.
promise
;
}
/**
* Copy files and directories in local directory recursively to remote directory
* @param localDirectory local diretory
* @param remoteDirectory remote directory
* @param sshClient SSH client
*/
export
async
function
copyDirectoryToRemote
(
localDirectory
:
string
,
remoteDirectory
:
string
,
sshClient
:
Client
,
remoteOS
:
string
):
Promise
<
void
>
{
const
tmpSuffix
:
string
=
uniqueString
(
5
);
const
localTarPath
:
string
=
path
.
join
(
os
.
tmpdir
(),
`nni_tmp_local_
${
tmpSuffix
}
.tar.gz`
);
const
remoteTarPath
:
string
=
unixPathJoin
(
getRemoteTmpDir
(
remoteOS
),
`nni_tmp_remote_
${
tmpSuffix
}
.tar.gz`
);
// Compress files in local directory to experiment root directory
await
tarAdd
(
localTarPath
,
localDirectory
);
// Copy the compressed file to remoteDirectory and delete it
await
copyFileToRemote
(
localTarPath
,
remoteTarPath
,
sshClient
);
await
execRemove
(
localTarPath
);
// Decompress the remote compressed file in and delete it
await
remoteExeCommand
(
`tar -oxzf
${
remoteTarPath
}
-C
${
remoteDirectory
}
`
,
sshClient
);
await
remoteExeCommand
(
`rm
${
remoteTarPath
}
`
,
sshClient
);
}
export
function
getRemoteFileContent
(
filePath
:
string
,
sshClient
:
Client
):
Promise
<
string
>
{
const
deferred
:
Deferred
<
string
>
=
new
Deferred
<
string
>
();
sshClient
.
sftp
((
err
:
Error
,
sftp
:
SFTPWrapper
)
=>
{
if
(
err
!==
undefined
&&
err
!==
null
)
{
getLogger
()
.
error
(
`getRemoteFileContent:
${
err
.
message
}
`
);
deferred
.
reject
(
new
Error
(
`SFTP error:
${
err
.
message
}
`
));
return
;
}
try
{
const
sftpStream
:
stream
.
Readable
=
sftp
.
createReadStream
(
filePath
);
let
dataBuffer
:
string
=
''
;
sftpStream
.
on
(
'
data
'
,
(
data
:
Buffer
|
string
)
=>
{
dataBuffer
+=
data
;
})
.
on
(
'
error
'
,
(
streamErr
:
Error
)
=>
{
sftp
.
end
();
deferred
.
reject
(
new
NNIError
(
NNIErrorNames
.
NOT_FOUND
,
streamErr
.
message
));
})
.
on
(
'
end
'
,
()
=>
{
// sftp connection need to be released manually once operation is done
sftp
.
end
();
deferred
.
resolve
(
dataBuffer
);
});
}
catch
(
error
)
{
getLogger
()
.
error
(
`getRemoteFileContent:
${
error
.
message
}
`
);
sftp
.
end
();
deferred
.
reject
(
new
Error
(
`SFTP error:
${
error
.
message
}
`
));
}
});
return
deferred
.
promise
;
}
}
src/nni_manager/training_service/remote_machine/test/linuxCommands.test.ts
0 → 100644
View file @
e29b58a1
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
chai
from
'
chai
'
;
import
*
as
chaiAsPromised
from
'
chai-as-promised
'
;
import
*
as
component
from
'
../../../common/component
'
;
import
{
cleanupUnitTest
,
prepareUnitTest
}
from
'
../../../common/utils
'
;
import
{
LinuxCommands
}
from
'
../extends/linuxCommands
'
;
// import { TrialConfigMetadataKey } from '../trialConfigMetadataKey';
describe
(
'
Unit Test for linuxCommands
'
,
()
=>
{
let
linuxCommands
:
LinuxCommands
before
(()
=>
{
chai
.
should
();
chai
.
use
(
chaiAsPromised
);
prepareUnitTest
();
});
after
(()
=>
{
cleanupUnitTest
();
});
beforeEach
(()
=>
{
linuxCommands
=
component
.
get
(
LinuxCommands
);
});
afterEach
(()
=>
{
});
it
(
'
joinPath
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
joinPath
(
"
/root/
"
,
"
/first
"
)).
to
.
equal
(
"
/root/first
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
/root
"
,
"
first
"
)).
to
.
equal
(
"
/root/first
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
/root/
"
,
"
first
"
)).
to
.
equal
(
"
/root/first
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
root/
"
,
"
first
"
)).
to
.
equal
(
"
root/first
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
root/
"
)).
to
.
equal
(
"
root/
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
root
"
)).
to
.
equal
(
"
root
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
./root
"
)).
to
.
equal
(
"
./root
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
""
)).
to
.
equal
(
"
.
"
);
chai
.
expect
(
linuxCommands
.
joinPath
(
"
..
"
)).
to
.
equal
(
"
..
"
);
})
it
(
'
createFolder
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
createFolder
(
"
test
"
)).
to
.
equal
(
"
mkdir -p 'test'
"
);
chai
.
expect
(
linuxCommands
.
createFolder
(
"
test
"
,
true
)).
to
.
equal
(
"
umask 0; mkdir -p 'test'
"
);
})
it
(
'
allowPermission
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
allowPermission
(
true
,
"
test
"
,
"
test1
"
)).
to
.
equal
(
"
chmod 777 -R 'test' 'test1'
"
);
chai
.
expect
(
linuxCommands
.
allowPermission
(
false
,
"
test
"
)).
to
.
equal
(
"
chmod 777 'test'
"
);
})
it
(
'
removeFolder
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
removeFolder
(
"
test
"
)).
to
.
equal
(
"
rm -df 'test'
"
);
chai
.
expect
(
linuxCommands
.
removeFolder
(
"
test
"
,
true
)).
to
.
equal
(
"
rm -rf 'test'
"
);
chai
.
expect
(
linuxCommands
.
removeFolder
(
"
test
"
,
true
,
false
)).
to
.
equal
(
"
rm -r 'test'
"
);
chai
.
expect
(
linuxCommands
.
removeFolder
(
"
test
"
,
false
,
false
)).
to
.
equal
(
"
rm 'test'
"
);
})
it
(
'
removeFiles
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
removeFiles
(
"
test
"
,
"
*.sh
"
)).
to
.
equal
(
"
rm 'test/*.sh'
"
);
chai
.
expect
(
linuxCommands
.
removeFiles
(
"
test
"
,
""
)).
to
.
equal
(
"
rm 'test'
"
);
})
it
(
'
readLastLines
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
readLastLines
(
"
test
"
,
3
)).
to
.
equal
(
"
tail -n 3 'test'
"
);
})
it
(
'
isProcessAlive
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
isProcessAliveCommand
(
"
test
"
)).
to
.
equal
(
"
kill -0 `cat 'test'`
"
);
chai
.
expect
(
linuxCommands
.
isProcessAliveProcessOutput
(
{
exitCode
:
0
,
stdout
:
""
,
stderr
:
""
}
)).
to
.
equal
(
true
);
chai
.
expect
(
linuxCommands
.
isProcessAliveProcessOutput
(
{
exitCode
:
10
,
stdout
:
""
,
stderr
:
""
}
)).
to
.
equal
(
false
);
})
it
(
'
killChildProcesses
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
killChildProcesses
(
"
test
"
)).
to
.
equal
(
"
pkill -P `cat 'test'`
"
);
})
it
(
'
extractFile
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
extractFile
(
"
test.tar
"
,
"
testfolder
"
)).
to
.
equal
(
"
tar -oxzf 'test.tar' -C 'testfolder'
"
);
})
it
(
'
executeScript
'
,
async
()
=>
{
chai
.
expect
(
linuxCommands
.
executeScript
(
"
test.sh
"
,
true
)).
to
.
equal
(
"
bash 'test.sh'
"
);
chai
.
expect
(
linuxCommands
.
executeScript
(
"
test script'
\"
"
,
false
)).
to
.
equal
(
`bash -c \"test script'\\""`
);
})
});
src/nni_manager/training_service/remote_machine/test/shellExecutor.test.ts
0 → 100644
View file @
e29b58a1
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
*
as
fs
from
'
fs
'
;
import
*
as
chai
from
'
chai
'
;
import
*
as
chaiAsPromised
from
'
chai-as-promised
'
;
import
{
Client
}
from
'
ssh2
'
;
import
{
ShellExecutor
}
from
'
../shellExecutor
'
;
import
{
prepareUnitTest
,
cleanupUnitTest
}
from
'
../../../common/utils
'
;
const
LOCALFILE
:
string
=
'
/tmp/localSshclientUTData
'
;
const
REMOTEFILE
:
string
=
'
/tmp/remoteSshclientUTData
'
;
const
REMOTEFOLDER
:
string
=
'
/tmp/remoteSshclientUTFolder
'
;
async
function
copyFile
(
executor
:
ShellExecutor
):
Promise
<
void
>
{
await
executor
.
copyFileToRemote
(
LOCALFILE
,
REMOTEFILE
);
}
async
function
copyFileToRemoteLoop
(
executor
:
ShellExecutor
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
10
;
i
++
)
{
// console.log(i);
await
executor
.
copyFileToRemote
(
LOCALFILE
,
REMOTEFILE
);
}
}
async
function
getRemoteFileContentLoop
(
executor
:
ShellExecutor
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
10
;
i
++
)
{
// console.log(i);
await
executor
.
getRemoteFileContent
(
REMOTEFILE
);
}
}
describe
(
'
ShellExecutor test
'
,
()
=>
{
let
skip
:
boolean
=
false
;
let
rmMeta
:
any
;
try
{
rmMeta
=
JSON
.
parse
(
fs
.
readFileSync
(
'
../../.vscode/rminfo.json
'
,
'
utf8
'
));
console
.
log
(
rmMeta
);
}
catch
(
err
)
{
console
.
log
(
`Please configure rminfo.json to enable remote machine test.
${
err
}
`
);
skip
=
true
;
}
before
(
async
()
=>
{
chai
.
should
();
chai
.
use
(
chaiAsPromised
);
await
cpp
.
exec
(
`echo '1234' >
${
LOCALFILE
}
`
);
prepareUnitTest
();
});
after
(()
=>
{
cleanupUnitTest
();
fs
.
unlinkSync
(
LOCALFILE
);
});
it
(
'
Test mkdir
'
,
async
()
=>
{
if
(
skip
)
{
return
;
}
const
shellExecutor
:
ShellExecutor
=
new
ShellExecutor
();
await
shellExecutor
.
initialize
(
rmMeta
);
let
result
=
await
shellExecutor
.
createFolder
(
REMOTEFOLDER
,
false
);
chai
.
expect
(
result
).
eq
(
true
);
result
=
await
shellExecutor
.
removeFolder
(
REMOTEFOLDER
);
chai
.
expect
(
result
).
eq
(
true
);
});
it
(
'
Test ShellExecutor
'
,
async
()
=>
{
if
(
skip
)
{
return
;
}
const
shellExecutor
:
ShellExecutor
=
new
ShellExecutor
();
await
shellExecutor
.
initialize
(
rmMeta
);
await
copyFile
(
shellExecutor
);
await
Promise
.
all
([
copyFileToRemoteLoop
(
shellExecutor
),
copyFileToRemoteLoop
(
shellExecutor
),
copyFileToRemoteLoop
(
shellExecutor
),
getRemoteFileContentLoop
(
shellExecutor
)
]);
});
});
src/nni_manager/training_service/test/sshClientUtility.test.ts
deleted
100644 → 0
View file @
e0c2c0eb
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'
use strict
'
;
import
*
as
cpp
from
'
child-process-promise
'
;
import
*
as
fs
from
'
fs
'
;
import
{
Client
}
from
'
ssh2
'
;
import
{
Deferred
}
from
'
ts-deferred
'
;
import
{
SSHClientUtility
}
from
'
../remote_machine/sshClientUtility
'
;
const
LOCALFILE
:
string
=
'
/tmp/sshclientUTData
'
;
const
REMOTEFILE
:
string
=
'
/tmp/sshclientUTData
'
;
async
function
copyFile
(
conn
:
Client
):
Promise
<
void
>
{
const
deferred
:
Deferred
<
void
>
=
new
Deferred
<
void
>
();
conn
.
sftp
((
err
,
sftp
)
=>
{
if
(
err
)
{
deferred
.
reject
(
err
);
return
;
}
sftp
.
fastPut
(
LOCALFILE
,
REMOTEFILE
,
(
fastPutErr
:
Error
)
=>
{
sftp
.
end
();
if
(
fastPutErr
)
{
deferred
.
reject
(
fastPutErr
);
}
else
{
deferred
.
resolve
();
}
}
);
});
return
deferred
.
promise
;
}
async
function
copyFileToRemoteLoop
(
conn
:
Client
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
500
;
i
++
)
{
console
.
log
(
i
);
await
SSHClientUtility
.
copyFileToRemote
(
LOCALFILE
,
REMOTEFILE
,
conn
);
}
}
async
function
remoteExeCommandLoop
(
conn
:
Client
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
500
;
i
++
)
{
console
.
log
(
i
);
await
SSHClientUtility
.
remoteExeCommand
(
'
ls
'
,
conn
);
}
}
async
function
getRemoteFileContentLoop
(
conn
:
Client
):
Promise
<
void
>
{
for
(
let
i
:
number
=
0
;
i
<
500
;
i
++
)
{
console
.
log
(
i
);
await
SSHClientUtility
.
getRemoteFileContent
(
REMOTEFILE
,
conn
);
}
}
describe
(
'
sshClientUtility test
'
,
()
=>
{
let
skip
:
boolean
=
true
;
let
rmMeta
:
any
;
try
{
rmMeta
=
JSON
.
parse
(
fs
.
readFileSync
(
'
../../.vscode/rminfo.json
'
,
'
utf8
'
));
}
catch
(
err
)
{
skip
=
true
;
}
before
(
async
()
=>
{
await
cpp
.
exec
(
`echo '1234' >
${
LOCALFILE
}
`
);
});
after
(()
=>
{
fs
.
unlinkSync
(
LOCALFILE
);
});
it
(
'
Test SSHClientUtility
'
,
(
done
)
=>
{
if
(
skip
)
{
done
();
return
;
}
const
conn
:
Client
=
new
Client
();
conn
.
on
(
'
ready
'
,
async
()
=>
{
await
copyFile
(
conn
);
await
Promise
.
all
([
copyFileToRemoteLoop
(
conn
),
copyFileToRemoteLoop
(
conn
),
copyFileToRemoteLoop
(
conn
),
remoteExeCommandLoop
(
conn
),
getRemoteFileContentLoop
(
conn
)
]);
done
();
}).
connect
(
rmMeta
);
});
});
src/sdk/pynni/nni/nas/pytorch/base_mutator.py
View file @
e29b58a1
...
...
@@ -104,7 +104,7 @@ class BaseMutator(nn.Module):
"""
pass
def
on_forward_layer_choice
(
self
,
mutable
,
*
input
s
):
def
on_forward_layer_choice
(
self
,
mutable
,
*
args
,
**
kwarg
s
):
"""
Callbacks of forward in LayerChoice.
...
...
@@ -112,8 +112,10 @@ class BaseMutator(nn.Module):
----------
mutable : LayerChoice
Module whose forward is called.
input
s : list of torch.Tensor
arg
s : list of torch.Tensor
The arguments of its forward function.
kwargs : dict
The keyword arguments of its forward function.
Returns
-------
...
...
src/sdk/pynni/nni/nas/pytorch/classic_nas/mutator.py
View file @
e29b58a1
...
...
@@ -203,7 +203,7 @@ class ClassicMutator(Mutator):
# for now we only generate flattened search space
if
isinstance
(
mutable
,
LayerChoice
):
key
=
mutable
.
key
val
=
[
repr
(
choice
)
for
choice
in
mutable
.
choic
es
]
val
=
mutable
.
nam
es
search_space
[
key
]
=
{
"_type"
:
LAYER_CHOICE
,
"_value"
:
val
}
elif
isinstance
(
mutable
,
InputChoice
):
key
=
mutable
.
key
...
...
src/sdk/pynni/nni/nas/pytorch/mutables.py
View file @
e29b58a1
...
...
@@ -2,6 +2,7 @@
# Licensed under the MIT license.
import
logging
from
collections
import
OrderedDict
import
torch.nn
as
nn
...
...
@@ -58,9 +59,6 @@ class Mutable(nn.Module):
"Or did you apply multiple fixed architectures?"
)
self
.
__dict__
[
"mutator"
]
=
mutator
def
forward
(
self
,
*
inputs
):
raise
NotImplementedError
@
property
def
key
(
self
):
"""
...
...
@@ -86,9 +84,6 @@ class Mutable(nn.Module):
"Or did you initialize a mutable on the fly in forward pass? Move to `__init__` "
"so that trainer can locate all your mutables. See NNI docs for more details."
.
format
(
self
))
def
__repr__
(
self
):
return
"{} ({})"
.
format
(
self
.
name
,
self
.
key
)
class
MutableScope
(
Mutable
):
"""
...
...
@@ -131,7 +126,7 @@ class LayerChoice(Mutable):
Parameters
----------
op_candidates : list of nn.Module
op_candidates : list of nn.Module
or OrderedDict
A module list to be selected from.
reduction : str
``mean``, ``concat``, ``sum`` or ``none``. Policy if multiples are selected.
...
...
@@ -146,23 +141,53 @@ class LayerChoice(Mutable):
----------
length : int
Number of ops to choose from.
names: list of str
Names of candidates.
Notes
-----
``op_candidates`` can be a list of modules or a ordered dict of named modules, for example,
.. code-block:: python
self.op_choice = LayerChoice(OrderedDict([
("conv3x3", nn.Conv2d(3, 16, 128)),
("conv5x5", nn.Conv2d(5, 16, 128)),
("conv7x7", nn.Conv2d(7, 16, 128))
]))
"""
def
__init__
(
self
,
op_candidates
,
reduction
=
"sum"
,
return_mask
=
False
,
key
=
None
):
super
().
__init__
(
key
=
key
)
self
.
length
=
len
(
op_candidates
)
self
.
choices
=
nn
.
ModuleList
(
op_candidates
)
self
.
choices
=
[]
self
.
names
=
[]
if
isinstance
(
op_candidates
,
OrderedDict
):
for
name
,
module
in
op_candidates
.
items
():
assert
name
not
in
[
"length"
,
"reduction"
,
"return_mask"
,
"_key"
,
"key"
,
"names"
],
\
"Please don't use a reserved name '{}' for your module."
.
format
(
name
)
self
.
add_module
(
name
,
module
)
self
.
choices
.
append
(
module
)
self
.
names
.
append
(
name
)
elif
isinstance
(
op_candidates
,
list
):
for
i
,
module
in
enumerate
(
op_candidates
):
self
.
add_module
(
str
(
i
),
module
)
self
.
choices
.
append
(
module
)
self
.
names
.
append
(
str
(
i
))
else
:
raise
TypeError
(
"Unsupported op_candidates type: {}"
.
format
(
type
(
op_candidates
)))
self
.
reduction
=
reduction
self
.
return_mask
=
return_mask
def
forward
(
self
,
*
input
s
):
def
forward
(
self
,
*
args
,
**
kwarg
s
):
"""
Returns
-------
tuple of tensors
Output and selection mask. If ``return_mask`` is ``False``, only output is returned.
"""
out
,
mask
=
self
.
mutator
.
on_forward_layer_choice
(
self
,
*
input
s
)
out
,
mask
=
self
.
mutator
.
on_forward_layer_choice
(
self
,
*
args
,
**
kwarg
s
)
if
self
.
return_mask
:
return
out
,
mask
return
out
...
...
src/sdk/pynni/nni/nas/pytorch/mutator.py
View file @
e29b58a1
...
...
@@ -128,7 +128,7 @@ class Mutator(BaseMutator):
result
[
"mutable"
][
mutable
.
key
].
append
(
path
)
return
result
def
on_forward_layer_choice
(
self
,
mutable
,
*
input
s
):
def
on_forward_layer_choice
(
self
,
mutable
,
*
args
,
**
kwarg
s
):
"""
On default, this method retrieves the decision obtained previously, and select certain operations.
Only operations with non-zero weight will be executed. The results will be added to a list.
...
...
@@ -138,7 +138,9 @@ class Mutator(BaseMutator):
----------
mutable : LayerChoice
Layer choice module.
inputs : list of torch.Tensor
args : list of torch.Tensor
Inputs
kwargs : dict
Inputs
Returns
...
...
@@ -148,16 +150,16 @@ class Mutator(BaseMutator):
"""
if
self
.
_connect_all
:
return
self
.
_all_connect_tensor_reduction
(
mutable
.
reduction
,
[
op
(
*
input
s
)
for
op
in
mutable
.
choices
]),
\
[
op
(
*
args
,
**
kwarg
s
)
for
op
in
mutable
.
choices
]),
\
torch
.
ones
(
mutable
.
length
)
def
_map_fn
(
op
,
*
input
s
):
return
op
(
*
input
s
)
def
_map_fn
(
op
,
args
,
kwarg
s
):
return
op
(
*
args
,
**
kwarg
s
)
mask
=
self
.
_get_decision
(
mutable
)
assert
len
(
mask
)
==
len
(
mutable
.
choices
),
\
"Invalid mask, expected {} to be of length {}."
.
format
(
mask
,
len
(
mutable
.
choices
))
out
=
self
.
_select_with_mask
(
_map_fn
,
[(
choice
,
*
input
s
)
for
choice
in
mutable
.
choices
],
mask
)
out
=
self
.
_select_with_mask
(
_map_fn
,
[(
choice
,
args
,
kwarg
s
)
for
choice
in
mutable
.
choices
],
mask
)
return
self
.
_tensor_reduction
(
mutable
.
reduction
,
out
),
mask
def
on_forward_input_choice
(
self
,
mutable
,
tensor_list
):
...
...
src/sdk/pynni/nni/nas/pytorch/proxylessnas/mutator.py
View file @
e29b58a1
...
...
@@ -317,7 +317,7 @@ class ProxylessNasMutator(BaseMutator):
self
.
mutable_list
.
append
(
mutable
)
mutable
.
registered_module
=
MixedOp
(
mutable
)
def
on_forward_layer_choice
(
self
,
mutable
,
*
input
s
):
def
on_forward_layer_choice
(
self
,
mutable
,
*
args
,
**
kwarg
s
):
"""
Callback of layer choice forward. This function defines the forward
logic of the input mutable. So mutable is only interface, its real
...
...
@@ -327,7 +327,9 @@ class ProxylessNasMutator(BaseMutator):
----------
mutable: LayerChoice
forward logic of this input mutable
inputs: list of torch.Tensor
args: list of torch.Tensor
inputs of this mutable
kwargs: dict
inputs of this mutable
Returns
...
...
@@ -339,7 +341,7 @@ class ProxylessNasMutator(BaseMutator):
"""
# FIXME: return mask, to be consistent with other algorithms
idx
=
mutable
.
registered_module
.
active_op_index
return
mutable
.
registered_module
(
mutable
,
*
input
s
),
idx
return
mutable
.
registered_module
(
mutable
,
*
args
,
**
kwarg
s
),
idx
def
reset_binary_gates
(
self
):
"""
...
...
src/webui/yarn.lock
View file @
e29b58a1
...
...
@@ -5593,7 +5593,7 @@ load-json-file@^4.0.0:
pify "^3.0.0"
strip-bom "^3.0.0"
loader-fs-cache@>=1.0.3,
loader-fs-cache@^1.0.0:
loader-fs-cache@^1.0.0:
version "1.0.3"
resolved "https://registry.yarnpkg.com/loader-fs-cache/-/loader-fs-cache-1.0.3.tgz#f08657646d607078be2f0a032f8bd69dd6f277d9"
integrity sha512-ldcgZpjNJj71n+2Mf6yetz+c9bM4xpKtNds4LbqXzU/PTdeAX0g3ytnU1AJMEcTk2Lex4Smpe3Q/eCTsvUBxbA==
...
...
test/config/integration_tests.yml
View file @
e29b58a1
...
...
@@ -77,6 +77,14 @@ testCases:
kwargs
:
expected_result_file
:
expected_metrics.json
-
name
:
export-float
configFile
:
test/config/metrics_test/config.yml
config
:
maxTrialNum
:
1
trialConcurrency
:
1
validator
:
class
:
ExportValidator
-
name
:
metrics-dict
configFile
:
test/config/metrics_test/config_dict_metrics.yml
config
:
...
...
@@ -87,6 +95,14 @@ testCases:
kwargs
:
expected_result_file
:
expected_metrics_dict.json
-
name
:
export-dict
configFile
:
test/config/metrics_test/config_dict_metrics.yml
config
:
maxTrialNum
:
1
trialConcurrency
:
1
validator
:
class
:
ExportValidator
-
name
:
nnicli
configFile
:
test/config/examples/sklearn-regression.yml
config
:
...
...
test/nni_test/nnitest/validators.py
View file @
e29b58a1
...
...
@@ -2,6 +2,8 @@
# Licensed under the MIT license.
import
os.path
as
osp
from
os
import
remove
import
subprocess
import
json
import
requests
import
nnicli
as
nc
...
...
@@ -12,6 +14,24 @@ class ITValidator:
def
__call__
(
self
,
rest_endpoint
,
experiment_dir
,
nni_source_dir
,
**
kwargs
):
pass
class
ExportValidator
(
ITValidator
):
def
__call__
(
self
,
rest_endpoint
,
experiment_dir
,
nni_source_dir
,
**
kwargs
):
exp_id
=
osp
.
split
(
experiment_dir
)[
-
1
]
proc1
=
subprocess
.
run
([
"nnictl"
,
"experiment"
,
"export"
,
exp_id
,
"-t"
,
"csv"
,
"-f"
,
"report.csv"
])
assert
proc1
.
returncode
==
0
,
'`nnictl experiment export -t csv` failed with code %d'
%
proc1
.
returncode
with
open
(
"report.csv"
,
'r'
)
as
f
:
print
(
'Exported CSV file:
\n
'
)
print
(
''
.
join
(
f
.
readlines
()))
print
(
'
\n\n
'
)
remove
(
'report.csv'
)
proc2
=
subprocess
.
run
([
"nnictl"
,
"experiment"
,
"export"
,
exp_id
,
"-t"
,
"json"
,
"-f"
,
"report.json"
])
assert
proc2
.
returncode
==
0
,
'`nnictl experiment export -t json` failed with code %d'
%
proc2
.
returncode
with
open
(
"report.json"
,
'r'
)
as
f
:
print
(
'Exported JSON file:
\n
'
)
print
(
'
\n
'
.
join
(
f
.
readlines
()))
print
(
'
\n\n
'
)
remove
(
'report.json'
)
class
MetricsValidator
(
ITValidator
):
def
__call__
(
self
,
rest_endpoint
,
experiment_dir
,
nni_source_dir
,
**
kwargs
):
...
...
test/pipelines/pipelines-it-pai-windows.yml
View file @
e29b58a1
...
...
@@ -70,5 +70,5 @@ jobs:
python --version
mount -o anon $(pai_nfs_uri) $(local_nfs_uri)
python nni_test/nnitest/generate_ts_config.py --ts pai --pai_token $(pai_token) --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $(docker_image) --pai_storage_plugin $(pai_storage_plugin) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
--exclude multi-phase
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName
:
'
Examples
and
advanced
features
tests
on
pai'
\ No newline at end of file
test/pipelines/pipelines-it-pai.yml
View file @
e29b58a1
...
...
@@ -57,5 +57,5 @@ jobs:
cd test
python3 nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\
--pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
--exclude multi-phase
PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName
:
'
integration
test'
tools/nni_cmd/nnictl_utils.py
View file @
e29b58a1
...
...
@@ -699,12 +699,13 @@ def export_trials_data(args):
content
=
json
.
loads
(
response
.
text
)
trial_records
=
[]
for
record
in
content
:
if
not
isinstance
(
record
[
'value'
],
(
float
,
int
)):
formated_record
=
{
**
record
[
'parameter'
],
**
record
[
'value'
],
**
{
'id'
:
record
[
'id'
]}}
record_value
=
json
.
loads
(
record
[
'value'
])
if
not
isinstance
(
record_value
,
(
float
,
int
)):
formated_record
=
{
**
record
[
'parameter'
],
**
record_value
,
**
{
'id'
:
record
[
'id'
]}}
else
:
formated_record
=
{
**
record
[
'parameter'
],
**
{
'reward'
:
record
[
'
value
'
]
,
'id'
:
record
[
'id'
]}}
formated_record
=
{
**
record
[
'parameter'
],
**
{
'reward'
:
record
_
value
,
'id'
:
record
[
'id'
]}}
trial_records
.
append
(
formated_record
)
with
open
(
args
.
path
,
'w'
)
as
file
:
with
open
(
args
.
path
,
'w'
,
newline
=
''
)
as
file
:
writer
=
csv
.
DictWriter
(
file
,
set
.
union
(
*
[
set
(
r
.
keys
())
for
r
in
trial_records
]))
writer
.
writeheader
()
writer
.
writerows
(
trial_records
)
...
...
tools/nni_gpu_tool/gpu_metrics_collector.py
View file @
e29b58a1
...
...
@@ -10,27 +10,31 @@ import traceback
from
xml.dom
import
minidom
def
check_ready_to_run
():
if
sys
.
platform
==
'win32'
:
pgrep_output
=
subprocess
.
check_output
(
'wmic process where "CommandLine like
\'
%nni_gpu_tool.gpu_metrics_collector%
\'
and name like
\'
%python%
\'
" get processId'
)
pidList
=
pgrep_output
.
decode
(
"utf-8"
).
strip
().
split
()
pidList
.
pop
(
0
)
# remove the key word 'ProcessId'
pidList
.
pop
(
0
)
# remove the key word 'ProcessId'
pidList
=
list
(
map
(
int
,
pidList
))
pidList
.
remove
(
os
.
getpid
())
return
not
pidList
else
:
pgrep_output
=
subprocess
.
check_output
(
'pgrep -f
x
u "$(whoami)"
\'
python3 -m nni_gpu_tool.gpu_metrics_collector
\'
'
,
shell
=
True
)
pgrep_output
=
subprocess
.
check_output
(
'pgrep -
a
fu "$(whoami)"
\'
python3 -m nni_gpu_tool.gpu_metrics_collector
\'
'
,
shell
=
True
)
pidList
=
[]
for
pid
in
pgrep_output
.
splitlines
():
pidList
.
append
(
int
(
pid
))
pidList
.
remove
(
os
.
getpid
())
pid
=
pid
.
decode
()
if
"pgrep "
in
pid
or
pid
.
startswith
(
'%s '
%
os
.
getpid
())
or
pid
.
startswith
(
'%s '
%
os
.
getppid
()):
continue
pidList
.
append
(
pid
)
return
not
pidList
def
main
(
argv
):
metrics_output_dir
=
os
.
environ
[
'METRIC_OUTPUT_DIR'
]
if
check_ready_to_run
()
==
False
:
#
GPU metrics collector is already running.
E
xit
print
(
"
GPU metrics collector is already running.
e
xit
ing..."
)
exit
(
2
)
cmd
=
'nvidia-smi -q -x'
.
split
()
while
(
True
):
...
...
@@ -44,6 +48,7 @@ def main(argv):
# TODO: change to sleep time configurable via arguments
time
.
sleep
(
5
)
def
parse_nvidia_smi_result
(
smi
,
outputDir
):
try
:
old_umask
=
os
.
umask
(
0
)
...
...
@@ -70,13 +75,14 @@ def parse_nvidia_smi_result(smi, outputDir):
outPut
[
"gpuInfos"
].
append
(
gpuInfo
)
print
(
outPut
)
outputFile
.
write
(
"{}
\n
"
.
format
(
json
.
dumps
(
outPut
,
sort_keys
=
True
)))
outputFile
.
flush
()
;
except
:
outputFile
.
flush
()
except
Exception
as
error
:
# e_info = sys.exc_info()
print
(
'
xmldoc paring error'
)
print
(
'
gpu_metrics_collector error: %s'
%
error
)
finally
:
os
.
umask
(
old_umask
)
def
gen_empty_gpu_metric
(
outputDir
):
try
:
old_umask
=
os
.
umask
(
0
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment