Unverified Commit 5bd994de authored by chicm-ms's avatar chicm-ms Committed by GitHub
Browse files

Fix dispatcher CUDA_VISIBLE_DEVICES envvar for windows (#1604)

* Fix dispatcher CUDA_VISIBLE_DEVICES for windows
parent 1bd24577
...@@ -49,8 +49,8 @@ interface ExperimentParams { ...@@ -49,8 +49,8 @@ interface ExperimentParams {
classArgs?: any; classArgs?: any;
classFileName?: string; classFileName?: string;
checkpointDir: string; checkpointDir: string;
gpuNum?: number;
includeIntermediateResults?: boolean; includeIntermediateResults?: boolean;
gpuIndices?: string;
}; };
assessor?: { assessor?: {
className: string; className: string;
...@@ -59,7 +59,6 @@ interface ExperimentParams { ...@@ -59,7 +59,6 @@ interface ExperimentParams {
classArgs?: any; classArgs?: any;
classFileName?: string; classFileName?: string;
checkpointDir: string; checkpointDir: string;
gpuNum?: number;
}; };
advisor?: { advisor?: {
className: string; className: string;
...@@ -68,7 +67,7 @@ interface ExperimentParams { ...@@ -68,7 +67,7 @@ interface ExperimentParams {
classArgs?: any; classArgs?: any;
classFileName?: string; classFileName?: string;
checkpointDir: string; checkpointDir: string;
gpuNum?: number; gpuIndices?: string;
}; };
clusterMetaData?: { clusterMetaData?: {
key: string; key: string;
......
...@@ -219,11 +219,6 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP ...@@ -219,11 +219,6 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
if (advisor.classFileName !== undefined && advisor.classFileName.length > 1) { if (advisor.classFileName !== undefined && advisor.classFileName.length > 1) {
command += ` --advisor_class_filename ${advisor.classFileName}`; command += ` --advisor_class_filename ${advisor.classFileName}`;
} }
if (advisor.gpuIndices !== undefined) {
command = `CUDA_VISIBLE_DEVICES=${advisor.gpuIndices} ` + command;
} else {
command = `CUDA_VISIBLE_DEVICES='' ` + command;
}
} else { } else {
command += ` --tuner_class_name ${tuner.className}`; command += ` --tuner_class_name ${tuner.className}`;
if (tuner.classArgs !== undefined) { if (tuner.classArgs !== undefined) {
...@@ -248,12 +243,6 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP ...@@ -248,12 +243,6 @@ function getMsgDispatcherCommand(tuner: any, assessor: any, advisor: any, multiP
command += ` --assessor_class_filename ${assessor.classFileName}`; command += ` --assessor_class_filename ${assessor.classFileName}`;
} }
} }
if (tuner.gpuIndices !== undefined) {
command = `CUDA_VISIBLE_DEVICES=${tuner.gpuIndices} ` + command;
} else {
command = `CUDA_VISIBLE_DEVICES='' ` + command;
}
} }
return command; return command;
......
...@@ -369,7 +369,8 @@ class NNIManager implements Manager { ...@@ -369,7 +369,8 @@ class NNIManager implements Manager {
NNI_CHECKPOINT_DIRECTORY: dataDirectory, NNI_CHECKPOINT_DIRECTORY: dataDirectory,
NNI_LOG_DIRECTORY: getLogDir(), NNI_LOG_DIRECTORY: getLogDir(),
NNI_LOG_LEVEL: getLogLevel(), NNI_LOG_LEVEL: getLogLevel(),
NNI_INCLUDE_INTERMEDIATE_RESULTS: includeIntermediateResultsEnv NNI_INCLUDE_INTERMEDIATE_RESULTS: includeIntermediateResultsEnv,
CUDA_VISIBLE_DEVICES: this.getGpuEnvvarValue()
}; };
let newEnv = Object.assign({}, process.env, nniEnv); let newEnv = Object.assign({}, process.env, nniEnv);
const tunerProc: ChildProcess = getTunerProc(command,stdio,newCwd,newEnv); const tunerProc: ChildProcess = getTunerProc(command,stdio,newCwd,newEnv);
...@@ -379,6 +380,22 @@ class NNIManager implements Manager { ...@@ -379,6 +380,22 @@ class NNIManager implements Manager {
return; return;
} }
private getGpuEnvvarValue(): string {
let cudaDevices: string | undefined;
if (this.experimentProfile.params.advisor !== undefined) {
cudaDevices = this.experimentProfile.params.advisor.gpuIndices;
} else if (this.experimentProfile.params.tuner !== undefined) {
cudaDevices = this.experimentProfile.params.tuner.gpuIndices;
}
if (cudaDevices === undefined) {
return '';
} else {
return cudaDevices;
}
}
private updateTrialConcurrency(trialConcurrency: number): void { private updateTrialConcurrency(trialConcurrency: number): void {
// we assume trialConcurrency >= 0, which is checked by restserver // we assume trialConcurrency >= 0, which is checked by restserver
this.trialConcurrencyChange += (trialConcurrency - this.experimentProfile.params.trialConcurrency); this.trialConcurrencyChange += (trialConcurrency - this.experimentProfile.params.trialConcurrency);
......
...@@ -72,8 +72,7 @@ describe('Unit test for dataStore', () => { ...@@ -72,8 +72,7 @@ describe('Unit test for dataStore', () => {
}`, }`,
tuner: { tuner: {
className: 'testTuner', className: 'testTuner',
checkpointDir: '/tmp/cp', checkpointDir: '/tmp/cp'
gpuNum: 0
} }
}, },
id: 'exp123', id: 'exp123',
......
...@@ -40,8 +40,7 @@ const expParams1: ExperimentParams = { ...@@ -40,8 +40,7 @@ const expParams1: ExperimentParams = {
searchSpace: 'SS', searchSpace: 'SS',
tuner: { tuner: {
className: 'testTuner', className: 'testTuner',
checkpointDir: '/tmp', checkpointDir: '/tmp'
gpuNum: 0
} }
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment