"driver/device_direct_convolution_3.cuh" did not exist on "a5bcde36e3a53e6ee68ee48af96c7441f620f574"
Unverified Commit e29b58a1 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #244 from microsoft/master

merge master
parents e0c2c0eb 4f88be1f
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'use strict';
import * as assert from 'assert';
import * as os from 'os';
import * as path from 'path';
import { Client, ClientChannel, SFTPWrapper } from 'ssh2';
import * as stream from 'stream';
import { Deferred } from 'ts-deferred';
import { NNIError, NNIErrorNames } from '../../common/errors';
import { getLogger, Logger } from '../../common/log';
import { getRemoteTmpDir, uniqueString, unixPathJoin } from '../../common/utils';
import { execRemove, tarAdd } from '../common/util';
import { RemoteCommandResult } from './remoteMachineData';
/**
*
* Utility for frequent operations towards SSH client
*
*/
export namespace SSHClientUtility {
/**
* Copy local file to remote path
* @param localFilePath the path of local file
* @param remoteFilePath the target path in remote machine
* @param sshClient SSH Client
*/
export function copyFileToRemote(localFilePath: string, remoteFilePath: string, sshClient: Client): Promise<boolean> {
const log: Logger = getLogger();
log.debug(`copyFileToRemote: localFilePath: ${localFilePath}, remoteFilePath: ${remoteFilePath}`);
assert(sshClient !== undefined);
const deferred: Deferred<boolean> = new Deferred<boolean>();
sshClient.sftp((err: Error, sftp: SFTPWrapper) => {
if (err !== undefined && err !== null) {
log.error(`copyFileToRemote: ${err.message}, ${localFilePath}, ${remoteFilePath}`);
deferred.reject(err);
return;
}
assert(sftp !== undefined);
sftp.fastPut(localFilePath, remoteFilePath, (fastPutErr: Error) => {
sftp.end();
if (fastPutErr !== undefined && fastPutErr !== null) {
deferred.reject(fastPutErr);
} else {
deferred.resolve(true);
}
});
});
return deferred.promise;
}
/**
* Execute command on remote machine
* @param command the command to execute remotely
* @param client SSH Client
*/
export function remoteExeCommand(command: string, client: Client): Promise<RemoteCommandResult> {
const log: Logger = getLogger();
log.debug(`remoteExeCommand: command: [${command}]`);
const deferred: Deferred<RemoteCommandResult> = new Deferred<RemoteCommandResult>();
let stdout: string = '';
let stderr: string = '';
let exitCode: number;
client.exec(command, (err: Error, channel: ClientChannel) => {
if (err !== undefined && err !== null) {
log.error(`remoteExeCommand: ${err.message}`);
deferred.reject(err);
return;
}
channel.on('data', (data: any, dataStderr: any) => {
if (dataStderr !== undefined && dataStderr !== null) {
stderr += data.toString();
} else {
stdout += data.toString();
}
})
.on('exit', (code: any, signal: any) => {
exitCode = <number>code;
deferred.resolve({
stdout : stdout,
stderr : stderr,
exitCode : exitCode
});
});
});
return deferred.promise;
}
/**
* Copy files and directories in local directory recursively to remote directory
* @param localDirectory local diretory
* @param remoteDirectory remote directory
* @param sshClient SSH client
*/
export async function copyDirectoryToRemote(localDirectory: string, remoteDirectory: string, sshClient: Client, remoteOS: string): Promise<void> {
const tmpSuffix: string = uniqueString(5);
const localTarPath: string = path.join(os.tmpdir(), `nni_tmp_local_${tmpSuffix}.tar.gz`);
const remoteTarPath: string = unixPathJoin(getRemoteTmpDir(remoteOS), `nni_tmp_remote_${tmpSuffix}.tar.gz`);
// Compress files in local directory to experiment root directory
await tarAdd(localTarPath, localDirectory);
// Copy the compressed file to remoteDirectory and delete it
await copyFileToRemote(localTarPath, remoteTarPath, sshClient);
await execRemove(localTarPath);
// Decompress the remote compressed file in and delete it
await remoteExeCommand(`tar -oxzf ${remoteTarPath} -C ${remoteDirectory}`, sshClient);
await remoteExeCommand(`rm ${remoteTarPath}`, sshClient);
}
export function getRemoteFileContent(filePath: string, sshClient: Client): Promise<string> {
const deferred: Deferred<string> = new Deferred<string>();
sshClient.sftp((err: Error, sftp: SFTPWrapper) => {
if (err !== undefined && err !== null) {
getLogger()
.error(`getRemoteFileContent: ${err.message}`);
deferred.reject(new Error(`SFTP error: ${err.message}`));
return;
}
try {
const sftpStream: stream.Readable = sftp.createReadStream(filePath);
let dataBuffer: string = '';
sftpStream.on('data', (data: Buffer | string) => {
dataBuffer += data;
})
.on('error', (streamErr: Error) => {
sftp.end();
deferred.reject(new NNIError(NNIErrorNames.NOT_FOUND, streamErr.message));
})
.on('end', () => {
// sftp connection need to be released manually once operation is done
sftp.end();
deferred.resolve(dataBuffer);
});
} catch (error) {
getLogger()
.error(`getRemoteFileContent: ${error.message}`);
sftp.end();
deferred.reject(new Error(`SFTP error: ${error.message}`));
}
});
return deferred.promise;
}
}
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'use strict';
import * as chai from 'chai';
import * as chaiAsPromised from 'chai-as-promised';
import * as component from '../../../common/component';
import { cleanupUnitTest, prepareUnitTest } from '../../../common/utils';
import { LinuxCommands } from '../extends/linuxCommands';
// import { TrialConfigMetadataKey } from '../trialConfigMetadataKey';
describe('Unit Test for linuxCommands', () => {
let linuxCommands: LinuxCommands
before(() => {
chai.should();
chai.use(chaiAsPromised);
prepareUnitTest();
});
after(() => {
cleanupUnitTest();
});
beforeEach(() => {
linuxCommands = component.get(LinuxCommands);
});
afterEach(() => {
});
it('joinPath', async () => {
chai.expect(linuxCommands.joinPath("/root/", "/first")).to.equal("/root/first");
chai.expect(linuxCommands.joinPath("/root", "first")).to.equal("/root/first");
chai.expect(linuxCommands.joinPath("/root/", "first")).to.equal("/root/first");
chai.expect(linuxCommands.joinPath("root/", "first")).to.equal("root/first");
chai.expect(linuxCommands.joinPath("root/")).to.equal("root/");
chai.expect(linuxCommands.joinPath("root")).to.equal("root");
chai.expect(linuxCommands.joinPath("./root")).to.equal("./root");
chai.expect(linuxCommands.joinPath("")).to.equal(".");
chai.expect(linuxCommands.joinPath("..")).to.equal("..");
})
it('createFolder', async () => {
chai.expect(linuxCommands.createFolder("test")).to.equal("mkdir -p 'test'");
chai.expect(linuxCommands.createFolder("test", true)).to.equal("umask 0; mkdir -p 'test'");
})
it('allowPermission', async () => {
chai.expect(linuxCommands.allowPermission(true, "test", "test1")).to.equal("chmod 777 -R 'test' 'test1'");
chai.expect(linuxCommands.allowPermission(false, "test")).to.equal("chmod 777 'test'");
})
it('removeFolder', async () => {
chai.expect(linuxCommands.removeFolder("test")).to.equal("rm -df 'test'");
chai.expect(linuxCommands.removeFolder("test", true)).to.equal("rm -rf 'test'");
chai.expect(linuxCommands.removeFolder("test", true, false)).to.equal("rm -r 'test'");
chai.expect(linuxCommands.removeFolder("test", false, false)).to.equal("rm 'test'");
})
it('removeFiles', async () => {
chai.expect(linuxCommands.removeFiles("test", "*.sh")).to.equal("rm 'test/*.sh'");
chai.expect(linuxCommands.removeFiles("test", "")).to.equal("rm 'test'");
})
it('readLastLines', async () => {
chai.expect(linuxCommands.readLastLines("test", 3)).to.equal("tail -n 3 'test'");
})
it('isProcessAlive', async () => {
chai.expect(linuxCommands.isProcessAliveCommand("test")).to.equal("kill -0 `cat 'test'`");
chai.expect(linuxCommands.isProcessAliveProcessOutput(
{
exitCode: 0,
stdout: "",
stderr: ""
}
)).to.equal(true);
chai.expect(linuxCommands.isProcessAliveProcessOutput(
{
exitCode: 10,
stdout: "",
stderr: ""
}
)).to.equal(false);
})
it('killChildProcesses', async () => {
chai.expect(linuxCommands.killChildProcesses("test")).to.equal("pkill -P `cat 'test'`");
})
it('extractFile', async () => {
chai.expect(linuxCommands.extractFile("test.tar", "testfolder")).to.equal("tar -oxzf 'test.tar' -C 'testfolder'");
})
it('executeScript', async () => {
chai.expect(linuxCommands.executeScript("test.sh", true)).to.equal("bash 'test.sh'");
chai.expect(linuxCommands.executeScript("test script'\"", false)).to.equal(`bash -c \"test script'\\""`);
})
});
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'use strict';
import * as cpp from 'child-process-promise';
import * as fs from 'fs';
import * as chai from 'chai';
import * as chaiAsPromised from 'chai-as-promised';
import { Client } from 'ssh2';
import { ShellExecutor } from '../shellExecutor';
import { prepareUnitTest, cleanupUnitTest } from '../../../common/utils';
const LOCALFILE: string = '/tmp/localSshclientUTData';
const REMOTEFILE: string = '/tmp/remoteSshclientUTData';
const REMOTEFOLDER: string = '/tmp/remoteSshclientUTFolder';
async function copyFile(executor: ShellExecutor): Promise<void> {
await executor.copyFileToRemote(LOCALFILE, REMOTEFILE);
}
async function copyFileToRemoteLoop(executor: ShellExecutor): Promise<void> {
for (let i: number = 0; i < 10; i++) {
// console.log(i);
await executor.copyFileToRemote(LOCALFILE, REMOTEFILE);
}
}
async function getRemoteFileContentLoop(executor: ShellExecutor): Promise<void> {
for (let i: number = 0; i < 10; i++) {
// console.log(i);
await executor.getRemoteFileContent(REMOTEFILE);
}
}
describe('ShellExecutor test', () => {
let skip: boolean = false;
let rmMeta: any;
try {
rmMeta = JSON.parse(fs.readFileSync('../../.vscode/rminfo.json', 'utf8'));
console.log(rmMeta);
} catch (err) {
console.log(`Please configure rminfo.json to enable remote machine test.${err}`);
skip = true;
}
before(async () => {
chai.should();
chai.use(chaiAsPromised);
await cpp.exec(`echo '1234' > ${LOCALFILE}`);
prepareUnitTest();
});
after(() => {
cleanupUnitTest();
fs.unlinkSync(LOCALFILE);
});
it('Test mkdir', async () => {
if (skip) {
return;
}
const shellExecutor: ShellExecutor = new ShellExecutor();
await shellExecutor.initialize(rmMeta);
let result = await shellExecutor.createFolder(REMOTEFOLDER, false);
chai.expect(result).eq(true);
result = await shellExecutor.removeFolder(REMOTEFOLDER);
chai.expect(result).eq(true);
});
it('Test ShellExecutor', async () => {
if (skip) {
return;
}
const shellExecutor: ShellExecutor = new ShellExecutor();
await shellExecutor.initialize(rmMeta);
await copyFile(shellExecutor);
await Promise.all([
copyFileToRemoteLoop(shellExecutor),
copyFileToRemoteLoop(shellExecutor),
copyFileToRemoteLoop(shellExecutor),
getRemoteFileContentLoop(shellExecutor)
]);
});
});
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
'use strict';
import * as cpp from 'child-process-promise';
import * as fs from 'fs';
import { Client } from 'ssh2';
import { Deferred } from 'ts-deferred';
import { SSHClientUtility } from '../remote_machine/sshClientUtility';
const LOCALFILE: string = '/tmp/sshclientUTData';
const REMOTEFILE: string = '/tmp/sshclientUTData';
async function copyFile(conn: Client): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
conn.sftp((err, sftp) => {
if (err) {
deferred.reject(err);
return;
}
sftp.fastPut(
LOCALFILE,
REMOTEFILE, (fastPutErr: Error) => {
sftp.end();
if (fastPutErr) {
deferred.reject(fastPutErr);
} else {
deferred.resolve();
}
}
);
});
return deferred.promise;
}
async function copyFileToRemoteLoop(conn: Client): Promise<void> {
for (let i: number = 0; i < 500; i++) {
console.log(i);
await SSHClientUtility.copyFileToRemote(LOCALFILE, REMOTEFILE, conn);
}
}
async function remoteExeCommandLoop(conn: Client): Promise<void> {
for (let i: number = 0; i < 500; i++) {
console.log(i);
await SSHClientUtility.remoteExeCommand('ls', conn);
}
}
async function getRemoteFileContentLoop(conn: Client): Promise<void> {
for (let i: number = 0; i < 500; i++) {
console.log(i);
await SSHClientUtility.getRemoteFileContent(REMOTEFILE, conn);
}
}
describe('sshClientUtility test', () => {
let skip: boolean = true;
let rmMeta: any;
try {
rmMeta = JSON.parse(fs.readFileSync('../../.vscode/rminfo.json', 'utf8'));
} catch (err) {
skip = true;
}
before(async () => {
await cpp.exec(`echo '1234' > ${LOCALFILE}`);
});
after(() => {
fs.unlinkSync(LOCALFILE);
});
it('Test SSHClientUtility', (done) => {
if (skip) {
done();
return;
}
const conn: Client = new Client();
conn.on('ready', async () => {
await copyFile(conn);
await Promise.all([
copyFileToRemoteLoop(conn),
copyFileToRemoteLoop(conn),
copyFileToRemoteLoop(conn),
remoteExeCommandLoop(conn),
getRemoteFileContentLoop(conn)
]);
done();
}).connect(rmMeta);
});
});
...@@ -104,7 +104,7 @@ class BaseMutator(nn.Module): ...@@ -104,7 +104,7 @@ class BaseMutator(nn.Module):
""" """
pass pass
def on_forward_layer_choice(self, mutable, *inputs): def on_forward_layer_choice(self, mutable, *args, **kwargs):
""" """
Callbacks of forward in LayerChoice. Callbacks of forward in LayerChoice.
...@@ -112,8 +112,10 @@ class BaseMutator(nn.Module): ...@@ -112,8 +112,10 @@ class BaseMutator(nn.Module):
---------- ----------
mutable : LayerChoice mutable : LayerChoice
Module whose forward is called. Module whose forward is called.
inputs : list of torch.Tensor args : list of torch.Tensor
The arguments of its forward function. The arguments of its forward function.
kwargs : dict
The keyword arguments of its forward function.
Returns Returns
------- -------
......
...@@ -203,7 +203,7 @@ class ClassicMutator(Mutator): ...@@ -203,7 +203,7 @@ class ClassicMutator(Mutator):
# for now we only generate flattened search space # for now we only generate flattened search space
if isinstance(mutable, LayerChoice): if isinstance(mutable, LayerChoice):
key = mutable.key key = mutable.key
val = [repr(choice) for choice in mutable.choices] val = mutable.names
search_space[key] = {"_type": LAYER_CHOICE, "_value": val} search_space[key] = {"_type": LAYER_CHOICE, "_value": val}
elif isinstance(mutable, InputChoice): elif isinstance(mutable, InputChoice):
key = mutable.key key = mutable.key
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# Licensed under the MIT license. # Licensed under the MIT license.
import logging import logging
from collections import OrderedDict
import torch.nn as nn import torch.nn as nn
...@@ -58,9 +59,6 @@ class Mutable(nn.Module): ...@@ -58,9 +59,6 @@ class Mutable(nn.Module):
"Or did you apply multiple fixed architectures?") "Or did you apply multiple fixed architectures?")
self.__dict__["mutator"] = mutator self.__dict__["mutator"] = mutator
def forward(self, *inputs):
raise NotImplementedError
@property @property
def key(self): def key(self):
""" """
...@@ -86,9 +84,6 @@ class Mutable(nn.Module): ...@@ -86,9 +84,6 @@ class Mutable(nn.Module):
"Or did you initialize a mutable on the fly in forward pass? Move to `__init__` " "Or did you initialize a mutable on the fly in forward pass? Move to `__init__` "
"so that trainer can locate all your mutables. See NNI docs for more details.".format(self)) "so that trainer can locate all your mutables. See NNI docs for more details.".format(self))
def __repr__(self):
return "{} ({})".format(self.name, self.key)
class MutableScope(Mutable): class MutableScope(Mutable):
""" """
...@@ -131,7 +126,7 @@ class LayerChoice(Mutable): ...@@ -131,7 +126,7 @@ class LayerChoice(Mutable):
Parameters Parameters
---------- ----------
op_candidates : list of nn.Module op_candidates : list of nn.Module or OrderedDict
A module list to be selected from. A module list to be selected from.
reduction : str reduction : str
``mean``, ``concat``, ``sum`` or ``none``. Policy if multiples are selected. ``mean``, ``concat``, ``sum`` or ``none``. Policy if multiples are selected.
...@@ -146,23 +141,53 @@ class LayerChoice(Mutable): ...@@ -146,23 +141,53 @@ class LayerChoice(Mutable):
---------- ----------
length : int length : int
Number of ops to choose from. Number of ops to choose from.
names: list of str
Names of candidates.
Notes
-----
``op_candidates`` can be a list of modules or a ordered dict of named modules, for example,
.. code-block:: python
self.op_choice = LayerChoice(OrderedDict([
("conv3x3", nn.Conv2d(3, 16, 128)),
("conv5x5", nn.Conv2d(5, 16, 128)),
("conv7x7", nn.Conv2d(7, 16, 128))
]))
""" """
def __init__(self, op_candidates, reduction="sum", return_mask=False, key=None): def __init__(self, op_candidates, reduction="sum", return_mask=False, key=None):
super().__init__(key=key) super().__init__(key=key)
self.length = len(op_candidates) self.length = len(op_candidates)
self.choices = nn.ModuleList(op_candidates) self.choices = []
self.names = []
if isinstance(op_candidates, OrderedDict):
for name, module in op_candidates.items():
assert name not in ["length", "reduction", "return_mask", "_key", "key", "names"], \
"Please don't use a reserved name '{}' for your module.".format(name)
self.add_module(name, module)
self.choices.append(module)
self.names.append(name)
elif isinstance(op_candidates, list):
for i, module in enumerate(op_candidates):
self.add_module(str(i), module)
self.choices.append(module)
self.names.append(str(i))
else:
raise TypeError("Unsupported op_candidates type: {}".format(type(op_candidates)))
self.reduction = reduction self.reduction = reduction
self.return_mask = return_mask self.return_mask = return_mask
def forward(self, *inputs): def forward(self, *args, **kwargs):
""" """
Returns Returns
------- -------
tuple of tensors tuple of tensors
Output and selection mask. If ``return_mask`` is ``False``, only output is returned. Output and selection mask. If ``return_mask`` is ``False``, only output is returned.
""" """
out, mask = self.mutator.on_forward_layer_choice(self, *inputs) out, mask = self.mutator.on_forward_layer_choice(self, *args, **kwargs)
if self.return_mask: if self.return_mask:
return out, mask return out, mask
return out return out
......
...@@ -128,7 +128,7 @@ class Mutator(BaseMutator): ...@@ -128,7 +128,7 @@ class Mutator(BaseMutator):
result["mutable"][mutable.key].append(path) result["mutable"][mutable.key].append(path)
return result return result
def on_forward_layer_choice(self, mutable, *inputs): def on_forward_layer_choice(self, mutable, *args, **kwargs):
""" """
On default, this method retrieves the decision obtained previously, and select certain operations. On default, this method retrieves the decision obtained previously, and select certain operations.
Only operations with non-zero weight will be executed. The results will be added to a list. Only operations with non-zero weight will be executed. The results will be added to a list.
...@@ -138,7 +138,9 @@ class Mutator(BaseMutator): ...@@ -138,7 +138,9 @@ class Mutator(BaseMutator):
---------- ----------
mutable : LayerChoice mutable : LayerChoice
Layer choice module. Layer choice module.
inputs : list of torch.Tensor args : list of torch.Tensor
Inputs
kwargs : dict
Inputs Inputs
Returns Returns
...@@ -148,16 +150,16 @@ class Mutator(BaseMutator): ...@@ -148,16 +150,16 @@ class Mutator(BaseMutator):
""" """
if self._connect_all: if self._connect_all:
return self._all_connect_tensor_reduction(mutable.reduction, return self._all_connect_tensor_reduction(mutable.reduction,
[op(*inputs) for op in mutable.choices]), \ [op(*args, **kwargs) for op in mutable.choices]), \
torch.ones(mutable.length) torch.ones(mutable.length)
def _map_fn(op, *inputs): def _map_fn(op, args, kwargs):
return op(*inputs) return op(*args, **kwargs)
mask = self._get_decision(mutable) mask = self._get_decision(mutable)
assert len(mask) == len(mutable.choices), \ assert len(mask) == len(mutable.choices), \
"Invalid mask, expected {} to be of length {}.".format(mask, len(mutable.choices)) "Invalid mask, expected {} to be of length {}.".format(mask, len(mutable.choices))
out = self._select_with_mask(_map_fn, [(choice, *inputs) for choice in mutable.choices], mask) out = self._select_with_mask(_map_fn, [(choice, args, kwargs) for choice in mutable.choices], mask)
return self._tensor_reduction(mutable.reduction, out), mask return self._tensor_reduction(mutable.reduction, out), mask
def on_forward_input_choice(self, mutable, tensor_list): def on_forward_input_choice(self, mutable, tensor_list):
......
...@@ -317,7 +317,7 @@ class ProxylessNasMutator(BaseMutator): ...@@ -317,7 +317,7 @@ class ProxylessNasMutator(BaseMutator):
self.mutable_list.append(mutable) self.mutable_list.append(mutable)
mutable.registered_module = MixedOp(mutable) mutable.registered_module = MixedOp(mutable)
def on_forward_layer_choice(self, mutable, *inputs): def on_forward_layer_choice(self, mutable, *args, **kwargs):
""" """
Callback of layer choice forward. This function defines the forward Callback of layer choice forward. This function defines the forward
logic of the input mutable. So mutable is only interface, its real logic of the input mutable. So mutable is only interface, its real
...@@ -327,7 +327,9 @@ class ProxylessNasMutator(BaseMutator): ...@@ -327,7 +327,9 @@ class ProxylessNasMutator(BaseMutator):
---------- ----------
mutable: LayerChoice mutable: LayerChoice
forward logic of this input mutable forward logic of this input mutable
inputs: list of torch.Tensor args: list of torch.Tensor
inputs of this mutable
kwargs: dict
inputs of this mutable inputs of this mutable
Returns Returns
...@@ -339,7 +341,7 @@ class ProxylessNasMutator(BaseMutator): ...@@ -339,7 +341,7 @@ class ProxylessNasMutator(BaseMutator):
""" """
# FIXME: return mask, to be consistent with other algorithms # FIXME: return mask, to be consistent with other algorithms
idx = mutable.registered_module.active_op_index idx = mutable.registered_module.active_op_index
return mutable.registered_module(mutable, *inputs), idx return mutable.registered_module(mutable, *args, **kwargs), idx
def reset_binary_gates(self): def reset_binary_gates(self):
""" """
......
...@@ -5593,7 +5593,7 @@ load-json-file@^4.0.0: ...@@ -5593,7 +5593,7 @@ load-json-file@^4.0.0:
pify "^3.0.0" pify "^3.0.0"
strip-bom "^3.0.0" strip-bom "^3.0.0"
loader-fs-cache@>=1.0.3, loader-fs-cache@^1.0.0: loader-fs-cache@^1.0.0:
version "1.0.3" version "1.0.3"
resolved "https://registry.yarnpkg.com/loader-fs-cache/-/loader-fs-cache-1.0.3.tgz#f08657646d607078be2f0a032f8bd69dd6f277d9" resolved "https://registry.yarnpkg.com/loader-fs-cache/-/loader-fs-cache-1.0.3.tgz#f08657646d607078be2f0a032f8bd69dd6f277d9"
integrity sha512-ldcgZpjNJj71n+2Mf6yetz+c9bM4xpKtNds4LbqXzU/PTdeAX0g3ytnU1AJMEcTk2Lex4Smpe3Q/eCTsvUBxbA== integrity sha512-ldcgZpjNJj71n+2Mf6yetz+c9bM4xpKtNds4LbqXzU/PTdeAX0g3ytnU1AJMEcTk2Lex4Smpe3Q/eCTsvUBxbA==
......
...@@ -77,6 +77,14 @@ testCases: ...@@ -77,6 +77,14 @@ testCases:
kwargs: kwargs:
expected_result_file: expected_metrics.json expected_result_file: expected_metrics.json
- name: export-float
configFile: test/config/metrics_test/config.yml
config:
maxTrialNum: 1
trialConcurrency: 1
validator:
class: ExportValidator
- name: metrics-dict - name: metrics-dict
configFile: test/config/metrics_test/config_dict_metrics.yml configFile: test/config/metrics_test/config_dict_metrics.yml
config: config:
...@@ -87,6 +95,14 @@ testCases: ...@@ -87,6 +95,14 @@ testCases:
kwargs: kwargs:
expected_result_file: expected_metrics_dict.json expected_result_file: expected_metrics_dict.json
- name: export-dict
configFile: test/config/metrics_test/config_dict_metrics.yml
config:
maxTrialNum: 1
trialConcurrency: 1
validator:
class: ExportValidator
- name: nnicli - name: nnicli
configFile: test/config/examples/sklearn-regression.yml configFile: test/config/examples/sklearn-regression.yml
config: config:
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
# Licensed under the MIT license. # Licensed under the MIT license.
import os.path as osp import os.path as osp
from os import remove
import subprocess
import json import json
import requests import requests
import nnicli as nc import nnicli as nc
...@@ -12,6 +14,24 @@ class ITValidator: ...@@ -12,6 +14,24 @@ class ITValidator:
def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs): def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
pass pass
class ExportValidator(ITValidator):
def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
exp_id = osp.split(experiment_dir)[-1]
proc1 = subprocess.run(["nnictl", "experiment", "export", exp_id, "-t", "csv", "-f", "report.csv"])
assert proc1.returncode == 0, '`nnictl experiment export -t csv` failed with code %d' % proc1.returncode
with open("report.csv", 'r') as f:
print('Exported CSV file: \n')
print(''.join(f.readlines()))
print('\n\n')
remove('report.csv')
proc2 = subprocess.run(["nnictl", "experiment", "export", exp_id, "-t", "json", "-f", "report.json"])
assert proc2.returncode == 0, '`nnictl experiment export -t json` failed with code %d' % proc2.returncode
with open("report.json", 'r') as f:
print('Exported JSON file: \n')
print('\n'.join(f.readlines()))
print('\n\n')
remove('report.json')
class MetricsValidator(ITValidator): class MetricsValidator(ITValidator):
def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs): def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
......
...@@ -70,5 +70,5 @@ jobs: ...@@ -70,5 +70,5 @@ jobs:
python --version python --version
mount -o anon $(pai_nfs_uri) $(local_nfs_uri) mount -o anon $(pai_nfs_uri) $(local_nfs_uri)
python nni_test/nnitest/generate_ts_config.py --ts pai --pai_token $(pai_token) --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $(docker_image) --pai_storage_plugin $(pai_storage_plugin) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip) python nni_test/nnitest/generate_ts_config.py --ts pai --pai_token $(pai_token) --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $(docker_image) --pai_storage_plugin $(pai_storage_plugin) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName: 'Examples and advanced features tests on pai' displayName: 'Examples and advanced features tests on pai'
\ No newline at end of file
...@@ -57,5 +57,5 @@ jobs: ...@@ -57,5 +57,5 @@ jobs:
cd test cd test
python3 nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\ python3 nni_test/nnitest/generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --nni_docker_image $TEST_IMG --pai_storage_plugin $(pai_storage_plugin)\
--pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip) --pai_token $(pai_token) --nni_manager_nfs_mount_path $(nni_manager_nfs_mount_path) --container_nfs_mount_path $(container_nfs_mount_path) --nni_manager_ip $(nni_manager_ip)
PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai --exclude multi-phase PATH=$HOME/.local/bin:$PATH python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName: 'integration test' displayName: 'integration test'
...@@ -699,12 +699,13 @@ def export_trials_data(args): ...@@ -699,12 +699,13 @@ def export_trials_data(args):
content = json.loads(response.text) content = json.loads(response.text)
trial_records = [] trial_records = []
for record in content: for record in content:
if not isinstance(record['value'], (float, int)): record_value = json.loads(record['value'])
formated_record = {**record['parameter'], **record['value'], **{'id': record['id']}} if not isinstance(record_value, (float, int)):
formated_record = {**record['parameter'], **record_value, **{'id': record['id']}}
else: else:
formated_record = {**record['parameter'], **{'reward': record['value'], 'id': record['id']}} formated_record = {**record['parameter'], **{'reward': record_value, 'id': record['id']}}
trial_records.append(formated_record) trial_records.append(formated_record)
with open(args.path, 'w') as file: with open(args.path, 'w', newline='') as file:
writer = csv.DictWriter(file, set.union(*[set(r.keys()) for r in trial_records])) writer = csv.DictWriter(file, set.union(*[set(r.keys()) for r in trial_records]))
writer.writeheader() writer.writeheader()
writer.writerows(trial_records) writer.writerows(trial_records)
......
...@@ -10,27 +10,31 @@ import traceback ...@@ -10,27 +10,31 @@ import traceback
from xml.dom import minidom from xml.dom import minidom
def check_ready_to_run(): def check_ready_to_run():
if sys.platform == 'win32': if sys.platform == 'win32':
pgrep_output = subprocess.check_output( pgrep_output = subprocess.check_output(
'wmic process where "CommandLine like \'%nni_gpu_tool.gpu_metrics_collector%\' and name like \'%python%\'" get processId') 'wmic process where "CommandLine like \'%nni_gpu_tool.gpu_metrics_collector%\' and name like \'%python%\'" get processId')
pidList = pgrep_output.decode("utf-8").strip().split() pidList = pgrep_output.decode("utf-8").strip().split()
pidList.pop(0) # remove the key word 'ProcessId' pidList.pop(0) # remove the key word 'ProcessId'
pidList = list(map(int, pidList)) pidList = list(map(int, pidList))
pidList.remove(os.getpid()) pidList.remove(os.getpid())
return not pidList return not pidList
else: else:
pgrep_output = subprocess.check_output('pgrep -fxu "$(whoami)" \'python3 -m nni_gpu_tool.gpu_metrics_collector\'', shell=True) pgrep_output = subprocess.check_output('pgrep -afu "$(whoami)" \'python3 -m nni_gpu_tool.gpu_metrics_collector\'', shell=True)
pidList = [] pidList = []
for pid in pgrep_output.splitlines(): for pid in pgrep_output.splitlines():
pidList.append(int(pid)) pid = pid.decode()
pidList.remove(os.getpid()) if "pgrep " in pid or pid.startswith('%s ' % os.getpid()) or pid.startswith('%s ' % os.getppid()):
continue
pidList.append(pid)
return not pidList return not pidList
def main(argv): def main(argv):
metrics_output_dir = os.environ['METRIC_OUTPUT_DIR'] metrics_output_dir = os.environ['METRIC_OUTPUT_DIR']
if check_ready_to_run() == False: if check_ready_to_run() == False:
# GPU metrics collector is already running. Exit print("GPU metrics collector is already running. exiting...")
exit(2) exit(2)
cmd = 'nvidia-smi -q -x'.split() cmd = 'nvidia-smi -q -x'.split()
while(True): while(True):
...@@ -44,6 +48,7 @@ def main(argv): ...@@ -44,6 +48,7 @@ def main(argv):
# TODO: change to sleep time configurable via arguments # TODO: change to sleep time configurable via arguments
time.sleep(5) time.sleep(5)
def parse_nvidia_smi_result(smi, outputDir): def parse_nvidia_smi_result(smi, outputDir):
try: try:
old_umask = os.umask(0) old_umask = os.umask(0)
...@@ -70,13 +75,14 @@ def parse_nvidia_smi_result(smi, outputDir): ...@@ -70,13 +75,14 @@ def parse_nvidia_smi_result(smi, outputDir):
outPut["gpuInfos"].append(gpuInfo) outPut["gpuInfos"].append(gpuInfo)
print(outPut) print(outPut)
outputFile.write("{}\n".format(json.dumps(outPut, sort_keys=True))) outputFile.write("{}\n".format(json.dumps(outPut, sort_keys=True)))
outputFile.flush(); outputFile.flush()
except: except Exception as error:
# e_info = sys.exc_info() # e_info = sys.exc_info()
print('xmldoc paring error') print('gpu_metrics_collector error: %s' % error)
finally: finally:
os.umask(old_umask) os.umask(old_umask)
def gen_empty_gpu_metric(outputDir): def gen_empty_gpu_metric(outputDir):
try: try:
old_umask = os.umask(0) old_umask = os.umask(0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment