Unverified Commit 154bcc55 authored by fishyds's avatar fishyds Committed by GitHub
Browse files

[PAI training service] Support virtualCluster configuration (#401)

* [PAI training service] Support virtual cluster config

* fix a small bug to convert virtualCluster to string
parent 21a2bb0b
...@@ -41,6 +41,7 @@ export namespace ValidationSchemas { ...@@ -41,6 +41,7 @@ export namespace ValidationSchemas {
memoryMB: joi.number().min(100), memoryMB: joi.number().min(100),
gpuNum: joi.number().min(0), gpuNum: joi.number().min(0),
command: joi.string().min(1), command: joi.string().min(1),
virtualCluster: joi.string(),
worker: joi.object({ worker: joi.object({
replicas: joi.number().min(1).required(), replicas: joi.number().min(1).required(),
image: joi.string().min(1), image: joi.string().min(1),
......
...@@ -69,6 +69,9 @@ export class PAIJobConfig{ ...@@ -69,6 +69,9 @@ export class PAIJobConfig{
// List of taskRole, one task role at least // List of taskRole, one task role at least
public taskRoles: PAITaskRole[]; public taskRoles: PAITaskRole[];
//The virtual cluster job runs on.
public readonly virtualCluster: string;
/** /**
* Constructor * Constructor
* @param jobName Name for the job, need to be unique * @param jobName Name for the job, need to be unique
...@@ -77,13 +80,15 @@ export class PAIJobConfig{ ...@@ -77,13 +80,15 @@ export class PAIJobConfig{
* @param outputDir Output directory on HDFS * @param outputDir Output directory on HDFS
* @param taskRoles List of taskRole, one task role at least * @param taskRoles List of taskRole, one task role at least
*/ */
constructor(jobName: string, image : string, dataDir : string, outputDir : string, codeDir : string, taskRoles : PAITaskRole[]){ constructor(jobName: string, image : string, dataDir : string, outputDir : string, codeDir : string,
taskRoles : PAITaskRole[], virtualCluster: string) {
this.jobName = jobName; this.jobName = jobName;
this.image = image; this.image = image;
this.dataDir = dataDir; this.dataDir = dataDir;
this.outputDir = outputDir; this.outputDir = outputDir;
this.codeDir = codeDir; this.codeDir = codeDir;
this.taskRoles = taskRoles; this.taskRoles = taskRoles;
this.virtualCluster = virtualCluster;
} }
} }
...@@ -112,13 +117,18 @@ export class NNIPAITrialConfig extends TrialConfig{ ...@@ -112,13 +117,18 @@ export class NNIPAITrialConfig extends TrialConfig{
public readonly dataDir: string; public readonly dataDir: string;
public outputDir: string; public outputDir: string;
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number, image: string, dataDir: string, outputDir: string) { //The virtual cluster job runs on. If omitted, the job will run on default virtual cluster
public virtualCluster?: string;
constructor(command : string, codeDir : string, gpuNum : number, cpuNum: number, memoryMB: number,
image: string, dataDir: string, outputDir: string, virtualCluster?: string) {
super(command, codeDir, gpuNum); super(command, codeDir, gpuNum);
this.cpuNum = cpuNum; this.cpuNum = cpuNum;
this.memoryMB = memoryMB; this.memoryMB = memoryMB;
this.image = image; this.image = image;
this.dataDir = dataDir; this.dataDir = dataDir;
this.outputDir = outputDir; this.outputDir = outputDir;
this.virtualCluster = virtualCluster;
} }
} }
...@@ -236,9 +236,10 @@ class PAITrainingService implements TrainingService { ...@@ -236,9 +236,10 @@ class PAITrainingService implements TrainingService {
this.paiTrialConfig.outputDir, this.paiTrialConfig.outputDir,
// codeDir // codeDir
`$PAI_DEFAULT_FS_URI${hdfsCodeDir}`, `$PAI_DEFAULT_FS_URI${hdfsCodeDir}`,
// TODO: Add Virutal Cluster
// PAI Task roles // PAI Task roles
paiTaskRoles); paiTaskRoles,
// Add Virutal Cluster
this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString());
// Step 2. Upload code files in codeDir onto HDFS // Step 2. Upload code files in codeDir onto HDFS
try { try {
......
...@@ -83,7 +83,8 @@ pai_trial_schema = { ...@@ -83,7 +83,8 @@ pai_trial_schema = {
'memoryMB': int, 'memoryMB': int,
'image': str, 'image': str,
Optional('dataDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'), Optional('dataDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),
Optional('outputDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?') Optional('outputDir'): Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),
Optional('virtualCluster'): str
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment