Unverified Commit 21a1dd8b authored by J-shang's avatar J-shang Committed by GitHub
Browse files

fix aml outputs and python process not killed (#3321)

* fix outputs and python process not killed

* change cleanup stop environment logic

* fix bug
parent a0aa12f9
......@@ -114,7 +114,7 @@ export class AMLEnvironmentService extends EnvironmentService {
}
const amlEnvironment: AMLEnvironmentInformation = environment as AMLEnvironmentInformation;
const environmentLocalTempFolder = path.join(this.experimentRootDir, this.experimentId, "environment-temp");
environment.command = `import os\nos.system('${amlEnvironment.command}')`;
environment.command = `import os\nos.system('mv envs outputs/envs && cd outputs && ${amlEnvironment.command}')`;
environment.useActiveGpu = this.amlClusterConfig.useActiveGpu;
environment.maxTrialNumberPerGpu = this.amlClusterConfig.maxTrialNumPerGpu;
await fs.promises.writeFile(path.join(environmentLocalTempFolder, 'nni_script.py'), amlEnvironment.command, { encoding: 'utf8' });
......
......@@ -111,6 +111,10 @@ ${environment.command} --job_pid_file ${environment.runnerWorkingFolder}/pid \
}
public async stopEnvironment(environment: EnvironmentInformation): Promise<void> {
if (environment.isAlive === false) {
return Promise.resolve();
}
const jobpidPath: string = `${environment.runnerWorkingFolder}/pid`;
const pid: string = await fs.promises.readFile(jobpidPath, 'utf8');
tkill(Number(pid), 'SIGKILL');
......
......@@ -219,6 +219,9 @@ export class OpenPaiEnvironmentService extends EnvironmentService {
public async stopEnvironment(environment: EnvironmentInformation): Promise<void> {
const deferred: Deferred<void> = new Deferred<void>();
if (environment.isAlive === false) {
return Promise.resolve();
}
if (this.paiClusterConfig === undefined) {
return Promise.reject(new Error('PAI Cluster config is not initialized'));
}
......
......@@ -289,6 +289,10 @@ ${environment.command} --job_pid_file ${environment.runnerWorkingFolder}/pid \
}
public async stopEnvironment(environment: EnvironmentInformation): Promise<void> {
if (environment.isAlive === false) {
return Promise.resolve();
}
const executor = await this.getExecutor(environment.id);
if (environment.status === 'UNKNOWN') {
......
......@@ -310,7 +310,6 @@ class TrialDispatcher implements TrainingService {
for (let index = 0; index < environments.length; index++) {
const environment = environments[index];
if (environment.isAlive === true) {
this.log.info(`stopping environment ${environment.id}...`);
if (environment.environmentService === undefined) {
throw new Error(`${environment.id} do not have environmentService!`);
......@@ -318,7 +317,6 @@ class TrialDispatcher implements TrainingService {
await environment.environmentService.stopEnvironment(environment);
this.log.info(`stopped environment ${environment.id}.`);
}
}
this.commandEmitter.off("command", this.handleCommand);
for (const commandChannel of this.commandChannelSet) {
await commandChannel.stop();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment