Unverified Commit 76f39903 authored by J-shang's avatar J-shang Committed by GitHub
Browse files

sharedstorage support remote umount and fix bug (#3456)

parent 200a1086
......@@ -124,6 +124,12 @@ class LinuxCommands extends OsCommands {
command = `bash '${script}'`;
} else {
script = script.replace(/"/g, '\\"');
const result = script.match(/[^\\]\\\\"/g);
if (result) {
result.forEach((res) => {
script = script.replace(res, res.replace(/"$/g, '\\"'));
})
}
command = `bash -c "${script}"`;
}
return command;
......
......@@ -181,7 +181,7 @@ export class RemoteEnvironmentService extends EnvironmentService {
} else {
environment.setStatus('FAILED');
}
this.releaseEnvironmentResource(environment);
await this.releaseEnvironmentResource(environment);
}
}
}
......@@ -194,7 +194,16 @@ export class RemoteEnvironmentService extends EnvironmentService {
* If a environment is finished, release the connection resource
* @param environment remote machine environment job detail
*/
private releaseEnvironmentResource(environment: EnvironmentInformation): void {
private async releaseEnvironmentResource(environment: EnvironmentInformation): Promise<void> {
if (environment.useSharedStorage) {
const executor = await this.getExecutor(environment.id);
const remoteUmountCommand = component.get<SharedStorageService>(SharedStorageService).remoteUmountCommand;
const result = await executor.executeScript(remoteUmountCommand, false, false);
if (result.exitCode !== 0) {
this.log.error(`Umount shared storage on remote machine failed.\n ERROR: ${result.stderr}`);
}
}
const executorManager = this.environmentExecutorManagerMap.get(environment.id);
if (executorManager === undefined) {
throw new Error(`ExecutorManager is not assigned for environment ${environment.id}`);
......@@ -251,8 +260,11 @@ export class RemoteEnvironmentService extends EnvironmentService {
const executor = await this.getExecutor(environment.id);
if (environment.useSharedStorage) {
this.remoteExperimentRootDir = component.get<SharedStorageService>(SharedStorageService).remoteWorkingRoot;
const remoteMountCommand = component.get<SharedStorageService>(SharedStorageService).remoteMountCommand;
await executor.executeScript(remoteMountCommand, false, false);
const remoteMountCommand = component.get<SharedStorageService>(SharedStorageService).remoteMountCommand.replace(/echo -e /g, `echo `).replace(/echo /g, `echo -e `);
const result = await executor.executeScript(remoteMountCommand, false, false);
if (result.exitCode !== 0) {
throw new Error(`Mount shared storage on remote machine failed.\n ERROR: ${result.stderr}`);
}
} else {
this.remoteExperimentRootDir = executor.getRemoteExperimentRootDir(getExperimentId());
}
......@@ -304,14 +316,14 @@ export class RemoteEnvironmentService extends EnvironmentService {
if (environment.status === 'UNKNOWN') {
environment.status = 'USER_CANCELED';
this.releaseEnvironmentResource(environment);
await this.releaseEnvironmentResource(environment);
return
}
const jobpidPath: string = `${environment.runnerWorkingFolder}/pid`;
try {
await executor.killChildProcesses(jobpidPath);
this.releaseEnvironmentResource(environment);
await this.releaseEnvironmentResource(environment);
} catch (error) {
this.log.error(`stopEnvironment: ${error}`);
}
......
......@@ -20,6 +20,8 @@ export abstract class SharedStorageService {
public abstract get storageService(): StorageService;
public abstract get localMountCommand(): string;
public abstract get remoteMountCommand(): string;
public abstract get remoteUmountCommand(): string;
public abstract get localWorkingRoot(): string;
public abstract get remoteWorkingRoot(): string;
public abstract cleanUp(): Promise<void>;
}
......@@ -79,6 +79,7 @@ export class AzureBlobSharedStorageService extends SharedStorageService {
private log: Logger;
private internalStorageService: MountedStorageService;
private experimentId: string;
private localMounted?: string;
private storageType?: SharedStorageType;
private storageAccountName?: string;
......@@ -113,10 +114,10 @@ export class AzureBlobSharedStorageService extends SharedStorageService {
this.log.error(errorMessage);
return Promise.reject(errorMessage);
}
if (azureblobConfig.localMounted === 'nnimount') {
this.localMounted = azureblobConfig.localMounted;
if (this.localMounted === 'nnimount') {
await this.helpLocalMount();
} else if (azureblobConfig.localMounted === 'nomount') {
} else if (this.localMounted === 'nomount') {
const errorMessage = `${this.storageType} Shared Storage: ${this.storageType} not Support 'nomount' yet.`;
this.log.error(errorMessage);
return Promise.reject(errorMessage);
......@@ -154,6 +155,15 @@ export class AzureBlobSharedStorageService extends SharedStorageService {
}
}
public get remoteUmountCommand(): string {
if (this.remoteMountPoint) {
return `sudo umount -l ${this.remoteMountPoint}`;
} else {
this.log.error(`${this.storageType} Shared Storage: remoteMountPoint is not initialized.`);
return '';
}
}
private getCommand(mountPoint: string): string {
const install = `rm -f nni_install_fuseblob.sh && touch nni_install_fuseblob.sh && echo "${INSTALL_BLOBFUSE.replace(/\$/g, `\\$`).replace(/\n/g, `\\n`).replace(/"/g, `\\"`)}" >> nni_install_fuseblob.sh && bash nni_install_fuseblob.sh`;
const prepare = `sudo mkdir /mnt/resource/nniblobfusetmp -p && rm -f nni_fuse_connection.cfg && touch nni_fuse_connection.cfg && echo "accountName ${this.storageAccountName}\\naccountKey ${this.storageAccountKey}\\ncontainerName ${this.containerName}" >> nni_fuse_connection.cfg`;
......@@ -206,4 +216,21 @@ export class AzureBlobSharedStorageService extends SharedStorageService {
return Promise.reject(errorMessage);
}
}
public async cleanUp(): Promise<void> {
if (this.localMounted !== 'nnimount') {
return Promise.resolve();
}
try {
const result = await cpp.exec(`sudo umount -l ${this.localMountPoint}`);
if (result.stderr) {
throw new Error(result.stderr);
}
} catch (error) {
const errorMessage: string = `${this.storageType} Shared Storage: Umount ${this.localMountPoint} failed, error is ${error}`;
this.log.error(errorMessage);
return Promise.reject(errorMessage);
}
return Promise.resolve();
}
}
......@@ -59,6 +59,7 @@ export class NFSSharedStorageService extends SharedStorageService {
private log: Logger;
private internalStorageService: MountedStorageService;
private experimentId: string;
private localMounted?: string;
private storageType?: SharedStorageType;
private nfsServer?: string;
......@@ -83,9 +84,10 @@ export class NFSSharedStorageService extends SharedStorageService {
this.storageType = nfsConfig.storageType;
this.nfsServer = nfsConfig.nfsServer;
this.exportedDirectory = nfsConfig.exportedDirectory;
if (nfsConfig.localMounted === 'nnimount') {
this.localMounted = nfsConfig.localMounted;
if (this.localMounted === 'nnimount') {
await this.helpLocalMount();
} else if (nfsConfig.localMounted === 'nomount') {
} else if (this.localMounted === 'nomount') {
const errorMessage = `${this.storageType} Shared Storage: ${this.storageType} not Support 'nomount'.`;
this.log.error(errorMessage);
return Promise.reject(errorMessage);
......@@ -122,6 +124,15 @@ export class NFSSharedStorageService extends SharedStorageService {
}
}
public get remoteUmountCommand(): string {
if (this.remoteMountPoint) {
return `sudo umount -f -l ${this.remoteMountPoint}`;
} else {
this.log.error(`${this.storageType} Shared Storage: remoteMountPoint is not initialized.`);
return '';
}
}
private getCommand(mountPoint: string): string {
const install = `rm -f nni_install_nfsclient.sh && touch nni_install_nfsclient.sh && echo "${INSTALL_NFS_CLIENT.replace(/\$/g, `\\$`).replace(/\n/g, `\\n`).replace(/"/g, `\\"`)}" >> nni_install_nfsclient.sh && bash nni_install_nfsclient.sh`;
const mount = `mkdir -p ${mountPoint} && sudo mount ${this.nfsServer}:${this.exportedDirectory} ${mountPoint}`;
......@@ -157,4 +168,21 @@ export class NFSSharedStorageService extends SharedStorageService {
return Promise.resolve();
}
public async cleanUp(): Promise<void> {
if (this.localMounted !== 'nnimount') {
return Promise.resolve();
}
try {
const result = await cpp.exec(`sudo umount -f -l ${this.localMountPoint}`);
if (result.stderr) {
throw new Error(result.stderr);
}
} catch (error) {
const errorMessage: string = `${this.storageType} Shared Storage: Umount ${this.localMountPoint} failed, error is ${error}`;
this.log.error(errorMessage);
return Promise.reject(errorMessage);
}
return Promise.resolve();
}
}
......@@ -348,6 +348,11 @@ class TrialDispatcher implements TrainingService {
for (const commandChannel of this.commandChannelSet) {
await commandChannel.stop();
}
if (this.useSharedStorage) {
this.log.info(`stopping shared storage...`)
await component.get<SharedStorageService>(SharedStorageService).cleanUp();
this.log.info(`shared storage stopped.`)
}
}
private async environmentMaintenanceLoop(): Promise<void> {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment