Unverified Commit 139e0a90 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Fix remote gpu scheduler bug (#1143)

parent ce274f0a
...@@ -29,31 +29,18 @@ import { GPUSummary, GPUInfo } from '../common/gpuData'; ...@@ -29,31 +29,18 @@ import { GPUSummary, GPUInfo } from '../common/gpuData';
* Metadata of remote machine for configuration and statuc query * Metadata of remote machine for configuration and statuc query
*/ */
export class RemoteMachineMeta { export class RemoteMachineMeta {
public readonly ip : string; public readonly ip : string = '';
public readonly port : number; public readonly port : number = 22;
public readonly username : string; public readonly username : string = '';
public readonly passwd?: string; public readonly passwd: string = '';
public readonly sshKeyPath?: string; public readonly sshKeyPath?: string;
public readonly passphrase?: string; public readonly passphrase?: string;
public gpuSummary : GPUSummary | undefined; public gpuSummary : GPUSummary | undefined;
public readonly gpuIndices?: string; public readonly gpuIndices?: string;
public readonly maxTrialNumPerGpu?: number; public readonly maxTrialNumPerGpu?: number;
public occupiedGpuIndexMap: Map<number, number>; //TODO: initialize varialbe in constructor
public occupiedGpuIndexMap?: Map<number, number>;
public readonly useActiveGpu?: boolean = false; public readonly useActiveGpu?: boolean = false;
constructor(ip : string, port : number, username : string, passwd : string,
sshKeyPath: string, passphrase : string, gpuIndices?: string, maxTrialNumPerGpu?: number, useActiveGpu?: boolean) {
this.ip = ip;
this.port = port;
this.username = username;
this.passwd = passwd;
this.sshKeyPath = sshKeyPath;
this.passphrase = passphrase;
this.gpuIndices = gpuIndices;
this.maxTrialNumPerGpu = maxTrialNumPerGpu;
this.occupiedGpuIndexMap = new Map<number, number>();
this.useActiveGpu = useActiveGpu;
}
} }
export function parseGpuIndices(gpuIndices?: string): Set<number> | undefined { export function parseGpuIndices(gpuIndices?: string): Set<number> | undefined {
......
...@@ -466,6 +466,7 @@ class RemoteMachineTrainingService implements TrainingService { ...@@ -466,6 +466,7 @@ class RemoteMachineTrainingService implements TrainingService {
let connectedRMNum: number = 0; let connectedRMNum: number = 0;
rmMetaList.forEach(async (rmMeta: RemoteMachineMeta) => { rmMetaList.forEach(async (rmMeta: RemoteMachineMeta) => {
rmMeta.occupiedGpuIndexMap = new Map<number, number>();
let sshClientManager: SSHClientManager = new SSHClientManager([], this.MAX_TRIAL_NUMBER_PER_SSHCONNECTION, rmMeta); let sshClientManager: SSHClientManager = new SSHClientManager([], this.MAX_TRIAL_NUMBER_PER_SSHCONNECTION, rmMeta);
let sshClient: Client = await sshClientManager.getAvailableSSHClient(); let sshClient: Client = await sshClientManager.getAvailableSSHClient();
this.machineSSHClientMap.set(rmMeta, sshClientManager); this.machineSSHClientMap.set(rmMeta, sshClientManager);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment