Unverified Commit 80bc9537 authored by Ni Hao's avatar Ni Hao Committed by GitHub
Browse files

add reusable for win remote (#3500)

parent 76f39903
...@@ -12,3 +12,10 @@ else ...@@ -12,3 +12,10 @@ else
# Install nni # Install nni
python3 -m pip install --user --upgrade nni python3 -m pip install --user --upgrade nni
fi`; fi`;
export const CONTAINER_INSTALL_NNI_SHELL_FORMAT_FOR_WIN: string =
`python -c "import nni" 2>$error
if ($error -ne ''){
python -m pip install --user --upgrade nni
}
exit`;
\ No newline at end of file
...@@ -30,10 +30,11 @@ class ShellExecutor { ...@@ -30,10 +30,11 @@ class ShellExecutor {
private readonly sshClient: Client; private readonly sshClient: Client;
private readonly log: Logger; private readonly log: Logger;
private tempPath: string = ""; private tempPath: string = "";
private isWindows: boolean = false;
private channelDefaultOutputs: string[] = []; private channelDefaultOutputs: string[] = [];
private pythonPath: string | undefined; private pythonPath: string | undefined;
public isWindows: boolean = false;
constructor() { constructor() {
this.log = getLogger(); this.log = getLogger();
this.sshClient = new Client(); this.sshClient = new Client();
......
...@@ -10,7 +10,7 @@ import { getExperimentId } from '../../../common/experimentStartupInfo'; ...@@ -10,7 +10,7 @@ import { getExperimentId } from '../../../common/experimentStartupInfo';
import { getLogger, Logger } from '../../../common/log'; import { getLogger, Logger } from '../../../common/log';
import { EnvironmentInformation, EnvironmentService } from '../environment'; import { EnvironmentInformation, EnvironmentService } from '../environment';
import { import {
getExperimentRootDir, getExperimentRootDir, getLogLevel
} from '../../../common/utils'; } from '../../../common/utils';
import { TrialConfig } from '../../common/trialConfig'; import { TrialConfig } from '../../common/trialConfig';
import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey'; import { TrialConfigMetadataKey } from '../../common/trialConfigMetadataKey';
...@@ -218,6 +218,30 @@ export class RemoteEnvironmentService extends EnvironmentService { ...@@ -218,6 +218,30 @@ export class RemoteEnvironmentService extends EnvironmentService {
this.remoteMachineMetaOccupiedMap.set(remoteEnvironment.rmMachineMeta, false); this.remoteMachineMetaOccupiedMap.set(remoteEnvironment.rmMachineMeta, false);
} }
private async getScript(environment: EnvironmentInformation): Promise<string> {
const executor = await this.getExecutor(environment.id);
const isDebug = getLogLevel() == "debug";
let script: string = environment.command;
environment.runnerWorkingFolder = executor.joinPath(this.remoteExperimentRootDir, 'envs', environment.id);
let codeScript = `echo $? \`date +%s%3N\` >${environment.runnerWorkingFolder}/code`;
if (executor.isWindows) {
const prepare = `mkdir envs\\${environment.id} 2>NUL & cd envs\\${environment.id}`;
const startrun = `powershell ..\\install_nni.ps1 && python -m nni.tools.trial_tool.trial_runner`;
const developingScript = "IF EXIST nni_trial_tool (ECHO \"nni_trial_tool exists already\") ELSE (mkdir nni_trial_tool && tar -xof ../nni_trial_tool.tar.gz -C ./nni_trial_tool) && pip3 install websockets";
script = isDebug ? `${prepare} && ${developingScript} && ${startrun}` : `${prepare} && ${startrun}`;
codeScript = `powershell -command "Write $? " " (((New-TimeSpan -Start (Get-Date "01/01/1970") -End (Get-Date).ToUniversalTime()).TotalMilliseconds).ToString("0")) | Out-file ${path.join(environment.runnerWorkingFolder, 'code')} -Append -NoNewline -encoding utf8"`;
}
script = `cd ${this.remoteExperimentRootDir} && \
${script} --job_pid_file ${environment.runnerWorkingFolder}/pid \
1>${environment.runnerWorkingFolder}/trialrunner_stdout 2>${environment.runnerWorkingFolder}/trialrunner_stderr \
&& ${codeScript}`;
return script;
}
public async startEnvironment(environment: EnvironmentInformation): Promise<void> { public async startEnvironment(environment: EnvironmentInformation): Promise<void> {
if (this.sshConnectionPromises.length > 0) { if (this.sshConnectionPromises.length > 0) {
await Promise.all(this.sshConnectionPromises); await Promise.all(this.sshConnectionPromises);
...@@ -268,11 +292,8 @@ export class RemoteEnvironmentService extends EnvironmentService { ...@@ -268,11 +292,8 @@ export class RemoteEnvironmentService extends EnvironmentService {
} else { } else {
this.remoteExperimentRootDir = executor.getRemoteExperimentRootDir(getExperimentId()); this.remoteExperimentRootDir = executor.getRemoteExperimentRootDir(getExperimentId());
} }
environment.runnerWorkingFolder = executor.joinPath(this.remoteExperimentRootDir, 'envs', environment.id);
environment.command = `cd ${this.remoteExperimentRootDir} && \ environment.command = await this.getScript(environment);
${environment.command} --job_pid_file ${environment.runnerWorkingFolder}/pid \
1>${environment.runnerWorkingFolder}/trialrunner_stdout 2>${environment.runnerWorkingFolder}/trialrunner_stderr \
&& echo $? \`date +%s%3N\` >${environment.runnerWorkingFolder}/code`;
return Promise.resolve(true); return Promise.resolve(true);
} }
} }
...@@ -291,7 +312,7 @@ export class RemoteEnvironmentService extends EnvironmentService { ...@@ -291,7 +312,7 @@ export class RemoteEnvironmentService extends EnvironmentService {
// Copy files in codeDir to remote working directory // Copy files in codeDir to remote working directory
await executor.copyDirectoryToRemote(environmentLocalTempFolder, this.remoteExperimentRootDir); await executor.copyDirectoryToRemote(environmentLocalTempFolder, this.remoteExperimentRootDir);
// Execute command in remote machine, set isInteractive=true to run script in conda environment // Execute command in remote machine, set isInteractive=true to run script in conda environment
executor.executeScript(executor.joinPath(environment.runnerWorkingFolder, executor.executeScript(executor.joinPath(this.remoteExperimentRootDir,
executor.getScriptName("run")), true, true); executor.getScriptName("run")), true, true);
if (environment.rmMachineMeta === undefined) { if (environment.rmMachineMeta === undefined) {
throw new Error(`${environment.id} rmMachineMeta not initialized!`); throw new Error(`${environment.id} rmMachineMeta not initialized!`);
......
...@@ -18,6 +18,7 @@ import { delay, getExperimentRootDir, getIPV4Address, getLogLevel, getVersion, m ...@@ -18,6 +18,7 @@ import { delay, getExperimentRootDir, getIPV4Address, getLogLevel, getVersion, m
import { GPU_INFO, INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, REPORT_METRIC_DATA, SEND_TRIAL_JOB_PARAMETER, STDOUT, TRIAL_END, VERSION_CHECK } from '../../core/commands'; import { GPU_INFO, INITIALIZED, KILL_TRIAL_JOB, NEW_TRIAL_JOB, REPORT_METRIC_DATA, SEND_TRIAL_JOB_PARAMETER, STDOUT, TRIAL_END, VERSION_CHECK } from '../../core/commands';
import { ScheduleResultType } from '../../training_service/common/gpuData'; import { ScheduleResultType } from '../../training_service/common/gpuData';
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../common/containerJobData'; import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../common/containerJobData';
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT_FOR_WIN } from '../common/containerJobData';
import { TrialConfig } from '../common/trialConfig'; import { TrialConfig } from '../common/trialConfig';
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey'; import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
import { validateCodeDir } from '../common/util'; import { validateCodeDir } from '../common/util';
...@@ -32,7 +33,6 @@ import { NFSSharedStorageService } from './shared_storages/nfsStorageService' ...@@ -32,7 +33,6 @@ import { NFSSharedStorageService } from './shared_storages/nfsStorageService'
import { AzureBlobSharedStorageService } from './shared_storages/azureblobStorageService' import { AzureBlobSharedStorageService } from './shared_storages/azureblobStorageService'
import { TrialDetail } from './trial'; import { TrialDetail } from './trial';
/** /**
* It uses to manage jobs on training platforms * It uses to manage jobs on training platforms
* and expose trial as trial job to upper level. * and expose trial as trial job to upper level.
...@@ -225,8 +225,10 @@ class TrialDispatcher implements TrainingService { ...@@ -225,8 +225,10 @@ class TrialDispatcher implements TrainingService {
const codeFileName = await storageService.copyDirectory(codeDir, envDir, true); const codeFileName = await storageService.copyDirectory(codeDir, envDir, true);
storageService.rename(codeFileName, "nni-code.tar.gz"); storageService.rename(codeFileName, "nni-code.tar.gz");
const installFileName = storageService.joinPath(envDir, 'install_nni.sh'); const installFileName = storageService.joinPath(envDir, `install_nni.sh`);
const installFileNameForWin = storageService.joinPath(envDir, `install_nni.ps1`);
await storageService.save(CONTAINER_INSTALL_NNI_SHELL_FORMAT, installFileName); await storageService.save(CONTAINER_INSTALL_NNI_SHELL_FORMAT, installFileName);
await storageService.save(CONTAINER_INSTALL_NNI_SHELL_FORMAT_FOR_WIN, installFileNameForWin);
const runnerSettingsConfig = storageService.joinPath(envDir, "settings.json"); const runnerSettingsConfig = storageService.joinPath(envDir, "settings.json");
await storageService.save(JSON.stringify(runnerSettings), runnerSettingsConfig); await storageService.save(JSON.stringify(runnerSettings), runnerSettingsConfig);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment