Unverified Commit e5b58531 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Add log for copy data in pai mode (#2702)

parent 754b0043
...@@ -9,6 +9,7 @@ import * as fs from 'fs'; ...@@ -9,6 +9,7 @@ import * as fs from 'fs';
import ignore from 'ignore'; import ignore from 'ignore';
import * as path from 'path'; import * as path from 'path';
import * as tar from 'tar'; import * as tar from 'tar';
import { getLogger } from '../../common/log';
import { String } from 'typescript-string-operations'; import { String } from 'typescript-string-operations';
import { validateFileName } from '../../common/utils'; import { validateFileName } from '../../common/utils';
import { GPU_INFO_COLLECTOR_FORMAT_WINDOWS } from './gpuData'; import { GPU_INFO_COLLECTOR_FORMAT_WINDOWS } from './gpuData';
...@@ -110,6 +111,7 @@ export async function execCopydir(source: string, destination: string): Promise< ...@@ -110,6 +111,7 @@ export async function execCopydir(source: string, destination: string): Promise<
await fs.promises.mkdir(destPath); await fs.promises.mkdir(destPath);
} }
} else { } else {
getLogger().debug(`Copying file from ${sourcePath} to ${destPath}`);
await fs.promises.copyFile(sourcePath, destPath); await fs.promises.copyFile(sourcePath, destPath);
} }
} }
......
...@@ -74,6 +74,7 @@ class PAIK8STrainingService extends PAITrainingService { ...@@ -74,6 +74,7 @@ class PAIK8STrainingService extends PAITrainingService {
const nniManagerNFSExpCodeDir = path.join(this.paiTrialConfig.nniManagerNFSMountPath, this.experimentId, 'nni-code'); const nniManagerNFSExpCodeDir = path.join(this.paiTrialConfig.nniManagerNFSMountPath, this.experimentId, 'nni-code');
await execMkdir(nniManagerNFSExpCodeDir); await execMkdir(nniManagerNFSExpCodeDir);
//Copy codeDir files to local working folder //Copy codeDir files to local working folder
this.log.info(`Starting copy codeDir data from ${this.paiTrialConfig.codeDir} to ${nniManagerNFSExpCodeDir}`);
this.copyExpCodeDirPromise = execCopydir(this.paiTrialConfig.codeDir, nniManagerNFSExpCodeDir); this.copyExpCodeDirPromise = execCopydir(this.paiTrialConfig.codeDir, nniManagerNFSExpCodeDir);
if (this.paiTrialConfig.paiConfigPath) { if (this.paiTrialConfig.paiConfigPath) {
this.paiJobConfig = yaml.safeLoad(fs.readFileSync(this.paiTrialConfig.paiConfigPath, 'utf8')); this.paiJobConfig = yaml.safeLoad(fs.readFileSync(this.paiTrialConfig.paiConfigPath, 'utf8'));
...@@ -259,6 +260,10 @@ class PAIK8STrainingService extends PAITrainingService { ...@@ -259,6 +260,10 @@ class PAIK8STrainingService extends PAITrainingService {
// Make sure experiment code files is copied from local to NFS // Make sure experiment code files is copied from local to NFS
if (this.copyExpCodeDirPromise !== undefined) { if (this.copyExpCodeDirPromise !== undefined) {
await this.copyExpCodeDirPromise; await this.copyExpCodeDirPromise;
this.log.info(`Copy codeDir data finished.`);
// All trials share same destination NFS code folder, only copy codeDir once for an experiment.
// After copy data finished, set copyExpCodeDirPromise be undefined to avoid log content duplicated.
this.copyExpCodeDirPromise = undefined;
} }
this.paiRestServerPort = this.paiJobRestServer.clusterRestServerPort; this.paiRestServerPort = this.paiJobRestServer.clusterRestServerPort;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment