Unverified Commit f892ed67 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

Add preCommand option to support configuring experimental environment by user (#2875)

parent 36042435
...@@ -107,3 +107,79 @@ Files in `codeDir` will be uploaded to remote machines automatically. You can ru ...@@ -107,3 +107,79 @@ Files in `codeDir` will be uploaded to remote machines automatically. You can ru
```bash ```bash
nnictl create --config examples/trials/mnist-annotation/config_remote.yml nnictl create --config examples/trials/mnist-annotation/config_remote.yml
``` ```
### Configure python environment
By default, commands and scripts will be executed in the default environment in remote machine. If there are multiple python virtual environments in your remote machine, and you want to run experiments in a specific environment, then use __preCommand__ to specify a python environment on your remote machine.
Use `examples/trials/mnist-tfv2` as the example. Below is content of `examples/trials/mnist-tfv2/config_remote.yml`:
```yaml
authorName: default
experimentName: example_mnist
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: remote
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
#machineList can be empty if the platform is local
machineList:
- ip: ${replace_to_your_remote_machine_ip}
username: ${replace_to_your_remote_machine_username}
sshKeyPath: ${replace_to_your_remote_machine_sshKeyPath}
# Pre-command will be executed before the remote machine executes other commands.
# Below is an example of specifying python environment.
# If you want to execute multiple commands, please use "&&" to connect them.
# preCommand: source ${replace_to_absolute_path_recommended_here}/bin/activate
# preCommand: source ${replace_to_conda_path}/bin/activate ${replace_to_conda_env_name}
preCommand: export PATH=${replace_to_python_environment_path_in_your_remote_machine}:$PATH
```
The __preCommand__ will be executed before the remote machine executes other commands. So you can configure python environment path like this:
```yaml
# Linux remote machine
preCommand: export PATH=${replace_to_python_environment_path_in_your_remote_machine}:$PATH
# Windows remote machine
preCommand: set path=${replace_to_python_environment_path_in_your_remote_machine};%path%
```
Or if you want to activate the `virtualenv` environment:
```yaml
# Linux remote machine
preCommand: source ${replace_to_absolute_path_recommended_here}/bin/activate
# Windows remote machine
preCommand: ${replace_to_absolute_path_recommended_here}\\scripts\\activate
```
Or if you want to activate the `conda` environment:
```yaml
# Linux remote machine
preCommand: source ${replace_to_conda_path}/bin/activate ${replace_to_conda_env_name}
# Windows remote machine
preCommand: call activate ${replace_to_conda_env_name}
```
If you want multiple commands to be executed, you can use `&&` to connect these commands:
```yaml
preCommand: command1 && command2 && command3
```
__Note__: Because __preCommand__ will execute before other commands each time, it is strongly not recommended to set __preCommand__ that will make changes to system, i.e. `mkdir` or `touch`.
...@@ -58,6 +58,7 @@ This document describes the rules to write the config file, and provides some ex ...@@ -58,6 +58,7 @@ This document describes the rules to write the config file, and provides some ex
- [gpuIndices](#gpuindices-3) - [gpuIndices](#gpuindices-3)
- [maxTrialNumPerGpu](#maxtrialnumpergpu-1) - [maxTrialNumPerGpu](#maxtrialnumpergpu-1)
- [useActiveGpu](#useactivegpu-1) - [useActiveGpu](#useactivegpu-1)
- [preCommand](#preCommand)
+ [kubeflowConfig](#kubeflowconfig) + [kubeflowConfig](#kubeflowconfig)
- [operator](#operator) - [operator](#operator)
- [storage](#storage) - [storage](#storage)
...@@ -583,6 +584,14 @@ Optional. Bool. Default: false. ...@@ -583,6 +584,14 @@ Optional. Bool. Default: false.
Used to specify whether to use a GPU if there is another process. By default, NNI will use the GPU only if there is no other active process in the GPU. If __useActiveGpu__ is set to true, NNI will use the GPU regardless of another processes. This field is not applicable for NNI on Windows. Used to specify whether to use a GPU if there is another process. By default, NNI will use the GPU only if there is no other active process in the GPU. If __useActiveGpu__ is set to true, NNI will use the GPU regardless of another processes. This field is not applicable for NNI on Windows.
#### preCommand
Optional. String.
Specifies the pre-command that will be executed before the remote machine executes other commands. Users can configure the experimental environment on remote machine by setting __preCommand__. If there are multiple commands need to execute, use `&&` to connect them, such as `preCommand: command1 && command2 && ...`.
__Note__: Because __preCommand__ will execute before other commands each time, it is strongly not recommended to set __preCommand__ that will make changes to system, i.e. `mkdir` or `touch`.
### kubeflowConfig ### kubeflowConfig
#### operator #### operator
...@@ -795,6 +804,12 @@ If run trial jobs in remote machine, users could specify the remote machine info ...@@ -795,6 +804,12 @@ If run trial jobs in remote machine, users could specify the remote machine info
username: test username: test
sshKeyPath: /nni/sshkey sshKeyPath: /nni/sshkey
passphrase: qwert passphrase: qwert
# Pre-command will be executed before the remote machine executes other commands.
# Below is an example of specifying python environment.
# If you want to execute multiple commands, please use "&&" to connect them.
# preCommand: source ${replace_to_absolute_path_recommended_here}/bin/activate
# preCommand: source ${replace_to_conda_path}/bin/activate ${replace_to_conda_env_name}
preCommand: export PATH=${replace_to_python_environment_path_in_your_remote_machine}:$PATH
``` ```
### PAI mode ### PAI mode
......
authorName: default
experimentName: example_mnist
trialConcurrency: 1
maxExecDuration: 1h
maxTrialNum: 10
#choice: local, remote, pai
trainingServicePlatform: remote
searchSpacePath: search_space.json
#choice: true, false
useAnnotation: false
tuner:
#choice: TPE, Random, Anneal, Evolution, BatchTuner, MetisTuner
#SMAC (SMAC should be installed through nnictl)
builtinTunerName: TPE
classArgs:
#choice: maximize, minimize
optimize_mode: maximize
trial:
command: python3 mnist.py
codeDir: .
gpuNum: 0
#machineList can be empty if the platform is local
machineList:
- ip: ${replace_to_your_remote_machine_ip}
username: ${replace_to_your_remote_machine_username}
sshKeyPath: ${replace_to_your_remote_machine_sshKeyPath}
# Pre-command will be executed before the remote machine executes other commands.
# Below is an example of specifying python environment.
# If you want to execute multiple commands, please use "&&" to connect them.
# preCommand: source ${replace_to_absolute_path_recommended_here}/bin/activate
# preCommand: source ${replace_to_conda_path}/bin/activate ${replace_to_conda_env_name}
preCommand: export PATH=${replace_to_python_environment_path_in_your_remote_machine}:$PATH
...@@ -17,7 +17,8 @@ export namespace ValidationSchemas { ...@@ -17,7 +17,8 @@ export namespace ValidationSchemas {
passphrase: joi.string(), passphrase: joi.string(),
gpuIndices: joi.string(), gpuIndices: joi.string(),
maxTrialNumPerGpu: joi.number(), maxTrialNumPerGpu: joi.number(),
useActiveGpu: joi.boolean() useActiveGpu: joi.boolean(),
preCommand: joi.string()
})), })),
local_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase local_config: joi.object({ // eslint-disable-line @typescript-eslint/camelcase
gpuIndices: joi.string(), gpuIndices: joi.string(),
......
...@@ -123,11 +123,19 @@ class LinuxCommands extends OsCommands { ...@@ -123,11 +123,19 @@ class LinuxCommands extends OsCommands {
if (isFile) { if (isFile) {
command = `bash '${script}'`; command = `bash '${script}'`;
} else { } else {
script = script.replace('"', '\\"'); script = script.replace(/"/g, '\\"');
command = `bash -c "${script}"`; command = `bash -c "${script}"`;
} }
return command; return command;
} }
public addPreCommand(preCommand: string | undefined, command: string | undefined): string | undefined{
if (command === undefined || command === '' || preCommand === undefined || preCommand === ''){
return command;
} else {
return `${preCommand} && ${command}`;
}
}
} }
export { LinuxCommands }; export { LinuxCommands };
...@@ -46,7 +46,7 @@ class WindowsCommands extends OsCommands { ...@@ -46,7 +46,7 @@ class WindowsCommands extends OsCommands {
} }
public generateGpuStatsScript(scriptFolder: string): string { public generateGpuStatsScript(scriptFolder: string): string {
return `powershell -command $env:METRIC_OUTPUT_DIR='${scriptFolder}';$app = Start-Process -FilePath python -NoNewWindow -passthru -ArgumentList '-m nni_gpu_tool.gpu_metrics_collector' -RedirectStandardOutput ${scriptFolder}\\scriptstdout -RedirectStandardError ${scriptFolder}\\scriptstderr;Write $PID ^| Out-File ${scriptFolder}\\pid -NoNewline -encoding utf8;wait-process $app.ID`; return `powershell -command $env:Path=If($env:prePath){$env:prePath}Else{$env:Path};$env:METRIC_OUTPUT_DIR='${scriptFolder}';$app = Start-Process -FilePath python -NoNewWindow -passthru -ArgumentList '-m nni_gpu_tool.gpu_metrics_collector' -RedirectStandardOutput ${scriptFolder}\\scriptstdout -RedirectStandardError ${scriptFolder}\\scriptstderr;Write $PID ^| Out-File ${scriptFolder}\\pid -NoNewline -encoding utf8;wait-process $app.ID`;
} }
public createFolder(folderName: string, sharedFolder: boolean = false): string { public createFolder(folderName: string, sharedFolder: boolean = false): string {
...@@ -122,6 +122,14 @@ class WindowsCommands extends OsCommands { ...@@ -122,6 +122,14 @@ class WindowsCommands extends OsCommands {
const command = `${script}`; const command = `${script}`;
return command; return command;
} }
public addPreCommand(preCommand: string | undefined, command: string | undefined): string | undefined{
if (command === undefined || command === '' || preCommand === undefined || preCommand === ''){
return command;
} else {
return `${preCommand} && set prePath=%path% && ${command}`;
}
}
} }
export { WindowsCommands }; export { WindowsCommands };
...@@ -28,6 +28,7 @@ abstract class OsCommands { ...@@ -28,6 +28,7 @@ abstract class OsCommands {
public abstract killChildProcesses(pidFileName: string, killSelf: boolean): string; public abstract killChildProcesses(pidFileName: string, killSelf: boolean): string;
public abstract extractFile(tarFileName: string, targetFolder: string): string; public abstract extractFile(tarFileName: string, targetFolder: string): string;
public abstract executeScript(script: string, isFile: boolean): string; public abstract executeScript(script: string, isFile: boolean): string;
public abstract addPreCommand(preCommand: string | undefined, command: string | undefined): string | undefined;
public joinPath(...paths: string[]): string { public joinPath(...paths: string[]): string {
let dir: string = paths.filter((path: any) => path !== '').join(this.pathSpliter); let dir: string = paths.filter((path: any) => path !== '').join(this.pathSpliter);
......
...@@ -23,6 +23,7 @@ export class RemoteMachineMeta { ...@@ -23,6 +23,7 @@ export class RemoteMachineMeta {
//TODO: initialize varialbe in constructor //TODO: initialize varialbe in constructor
public occupiedGpuIndexMap?: Map<number, number>; public occupiedGpuIndexMap?: Map<number, number>;
public readonly useActiveGpu?: boolean = false; public readonly useActiveGpu?: boolean = false;
public readonly preCommand?: string;
} }
/** /**
......
...@@ -32,6 +32,7 @@ class ShellExecutor { ...@@ -32,6 +32,7 @@ class ShellExecutor {
private tempPath: string = ""; private tempPath: string = "";
private isWindows: boolean = false; private isWindows: boolean = false;
private channelDefaultOutputs: string[] = []; private channelDefaultOutputs: string[] = [];
private preCommand: string | undefined;
constructor() { constructor() {
this.log = getLogger(); this.log = getLogger();
...@@ -47,6 +48,7 @@ class ShellExecutor { ...@@ -47,6 +48,7 @@ class ShellExecutor {
username: rmMeta.username, username: rmMeta.username,
tryKeyboard: true, tryKeyboard: true,
}; };
this.preCommand = rmMeta.preCommand;
this.name = `${rmMeta.username}@${rmMeta.ip}:${rmMeta.port}`; this.name = `${rmMeta.username}@${rmMeta.ip}:${rmMeta.port}`;
if (rmMeta.passwd !== undefined) { if (rmMeta.passwd !== undefined) {
connectConfig.password = rmMeta.passwd; connectConfig.password = rmMeta.passwd;
...@@ -349,6 +351,9 @@ class ShellExecutor { ...@@ -349,6 +351,9 @@ class ShellExecutor {
let exitCode: number; let exitCode: number;
const commandIndex = randomInt(10000); const commandIndex = randomInt(10000);
if(this.osCommands !== undefined){
command = this.osCommands.addPreCommand(this.preCommand, command);
}
this.log.debug(`remoteExeCommand(${commandIndex}): [${command}]`); this.log.debug(`remoteExeCommand(${commandIndex}): [${command}]`);
// Windows always uses shell, and it needs to disable to get it works. // Windows always uses shell, and it needs to disable to get it works.
......
...@@ -36,6 +36,7 @@ async function getRemoteFileContentLoop(executor: ShellExecutor): Promise<void> ...@@ -36,6 +36,7 @@ async function getRemoteFileContentLoop(executor: ShellExecutor): Promise<void>
describe('ShellExecutor test', () => { describe('ShellExecutor test', () => {
let skip: boolean = false; let skip: boolean = false;
let isWindows: boolean;
let rmMeta: any; let rmMeta: any;
try { try {
rmMeta = JSON.parse(fs.readFileSync('../../.vscode/rminfo.json', 'utf8')); rmMeta = JSON.parse(fs.readFileSync('../../.vscode/rminfo.json', 'utf8'));
...@@ -86,4 +87,28 @@ describe('ShellExecutor test', () => { ...@@ -86,4 +87,28 @@ describe('ShellExecutor test', () => {
await getRemoteFileContentLoop(executor); await getRemoteFileContentLoop(executor);
await executor.close(); await executor.close();
}); });
it('Test preCommand-1', async () => {
if (skip) {
return;
}
const executor: ShellExecutor = new ShellExecutor();
await executor.initialize(rmMeta);
const result = await executor.executeScript("ver", false, false);
isWindows = result.exitCode == 0 && result.stdout.search("Windows") > -1;
await executor.close();
});
it('Test preCommand-2', async () => {
if (skip) {
return;
}
const executor: ShellExecutor = new ShellExecutor();
rmMeta.preCommand = isWindows ? "set TEST_PRE_COMMAND=test_pre_command" : "export TEST_PRE_COMMAND=test_pre_command";
await executor.initialize(rmMeta);
const command = isWindows ? "python -c \"import os; print(os.environ.get(\'TEST_PRE_COMMAND\'))\"" : "python3 -c \"import os; print(os.environ.get(\'TEST_PRE_COMMAND\'))\"";
const result = (await executor.executeScript(command, false, false)).stdout.replace(/[\ +\r\n]/g, "");
chai.expect(result).eq("test_pre_command");
await executor.close();
});
}); });
...@@ -25,8 +25,8 @@ describe('Unit Test for RemoteMachineTrainingService', () => { ...@@ -25,8 +25,8 @@ describe('Unit Test for RemoteMachineTrainingService', () => {
Default/.vscode/rminfo.json, whose content looks like: Default/.vscode/rminfo.json, whose content looks like:
{ {
"ip": "10.172.121.40", "ip": "10.172.121.40",
"user": "user1", "username": "user1",
"password": "mypassword" "passwd": "mypassword"
} }
*/ */
let skip: boolean = false; let skip: boolean = false;
......
...@@ -382,7 +382,8 @@ machine_list_schema = { ...@@ -382,7 +382,8 @@ machine_list_schema = {
Optional('passphrase'): setType('passphrase', str), Optional('passphrase'): setType('passphrase', str),
Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int), Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
Optional('useActiveGpu'): setType('useActiveGpu', bool) Optional('useActiveGpu'): setType('useActiveGpu', bool),
Optional('preCommand'): setType('preCommand', str)
}, },
{ {
'ip': setType('ip', str), 'ip': setType('ip', str),
...@@ -391,7 +392,8 @@ machine_list_schema = { ...@@ -391,7 +392,8 @@ machine_list_schema = {
'passwd': setType('passwd', str), 'passwd': setType('passwd', str),
Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'), Optional('gpuIndices'): Or(int, And(str, lambda x: len([int(i) for i in x.split(',')]) > 0), error='gpuIndex format error!'),
Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int), Optional('maxTrialNumPerGpu'): setType('maxTrialNumPerGpu', int),
Optional('useActiveGpu'): setType('useActiveGpu', bool) Optional('useActiveGpu'): setType('useActiveGpu', bool),
Optional('preCommand'): setType('preCommand', str)
})] })]
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment