Unverified Commit 154bcc55 authored by fishyds's avatar fishyds Committed by GitHub
Browse files

[PAI training service] Support virtualCluster configuration (#401)

* [PAI training service] Support virtual cluster config

* fix a small bug to convert virtualCluster to string
parent 21a2bb0b
......@@ -41,6 +41,7 @@ export namespace ValidationSchemas {
memoryMB: joi.number().min(100),
gpuNum: joi.number().min(0),
command: joi.string().min(1),
virtualCluster: joi.string(),
worker: joi.object({
replicas: joi.number().min(1).required(),
image: joi.string().min(1),
......
......@@ -69,6 +69,9 @@ export class PAIJobConfig{
// List of taskRole, one task role at least
public taskRoles: PAITaskRole[];
//The virtual cluster job runs on.
public readonly virtualCluster: string;
/**
* Constructor
* @param jobName Name for the job, need to be unique
......@@ -77,13 +80,15 @@ export class PAIJobConfig{
* @param outputDir Output directory on HDFS
* @param taskRoles List of taskRole, one task role at least
*/
constructor(jobName: string, image : string, dataDir : string, outputDir : string, codeDir : string, taskRoles : PAITaskRole[]){
constructor(jobName: string, image : string, dataDir : string, outputDir : string, codeDir : string,
taskRoles : PAITaskRole[], virtualCluster: string) {
this.jobName = jobName;
this.image = image;
this.dataDir = dataDir;
this.outputDir = outputDir;
this.codeDir = codeDir;
this.taskRoles = taskRoles;
this.virtualCluster = virtualCluster;
}
}
......@@ -112,13 +117,18 @@ export class NNIPAITrialConfig extends TrialConfig{
public readonly dataDir: string;
public outputDir: string;
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number, image: string, dataDir: string, outputDir: string) {
//The virtual cluster job runs on. If omitted, the job will run on default virtual cluster
public virtualCluster?: string;
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number,
image: string, dataDir: string, outputDir: string, virtualCluster?: string) {
super(command, codeDir, gpuNum);
this.cpuNum = cpuNum;
this.memoryMB = memoryMB;
this.image = image;
this.dataDir = dataDir;
this.outputDir = outputDir;
this.virtualCluster = virtualCluster;
}
}
......@@ -236,9 +236,10 @@ class PAITrainingService implements TrainingService {
this.paiTrialConfig.outputDir,
// codeDir
`$PAI_DEFAULT_FS_URI${hdfsCodeDir}`,
// TODO: Add Virutal Cluster
// PAI Task roles
paiTaskRoles);
paiTaskRoles,
// Add Virutal Cluster
this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString());
// Step 2. Upload code files in codeDir onto HDFS
try {
......@@ -393,7 +394,7 @@ class PAITrainingService implements TrainingService {
this.paiClusterConfig.host
).replace(/\r\n|\n|\r/gm, '');
}
const hdfsDirContent = this.paiTrialConfig.outputDir.match(this.hdfsDirPattern);
if(hdfsDirContent === null) {
......
......@@ -83,7 +83,8 @@ pai_trial_schema = {
'memoryMB': int,
'image': str,
Optional('dataDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),
Optional('outputDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?')
Optional('outputDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),
Optional('virtualCluster'): str
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment