Unverified Commit feb6f3b8 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Support paiTrainingService on windows (#1075)

parent e9eaa00d
......@@ -357,13 +357,18 @@ function countFilesRecursively(directory: string, timeoutMilliSeconds?: number):
});
let fileCount: number = -1;
cpp.exec(`find ${directory} -type f | wc -l`).then((result) => {
let cmd: string;
if(process.platform === "win32") {
cmd = `powershell "Get-ChildItem -Path ${directory} -Recurse -File | Measure-Object | %{$_.Count}"`
} else {
cmd = `find ${directory} -type f | wc -l`;
}
cpp.exec(cmd).then((result) => {
if(result.stdout && parseInt(result.stdout)) {
fileCount = parseInt(result.stdout);
}
deferred.resolve(fileCount);
});
return Promise.race([deferred.promise, delayTimeout]).finally(() => {
clearTimeout(timeoutId);
});
......@@ -459,6 +464,16 @@ function getNewLine(): string{
}
}
/**
* Use '/' to join path instead of '\' for all kinds of platform
* @param path
*/
function unixPathJoin(...paths: any[]): string {
const dir: string = paths.filter((path: any) => path !== '').join('/');
if (dir === '') return '.';
return dir;
}
export {countFilesRecursively, getRemoteTmpDir, generateParamFileName, getMsgDispatcherCommand, getCheckpointDir,
getLogDir, getExperimentRootDir, getJobCancelStatus, getDefaultDatabaseDir, getIPV4Address,
getLogDir, getExperimentRootDir, getJobCancelStatus, getDefaultDatabaseDir, getIPV4Address, unixPathJoin,
mkDirP, delay, prepareUnitTest, parseArg, cleanupUnitTest, uniqueString, randomSelect, getLogLevel, getVersion, getCmdPy, getTunerProc, isAlive, killPid, getNewLine };
......@@ -152,7 +152,16 @@ mkDirP(getLogDir())
console.error(`Failed to create log dir: ${err.stack}`);
});
process.on('SIGTERM', async () => {
function getStopSignal(): any {
if (process.platform === "win32") {
return 'SIGBREAK';
}
else{
return 'SIGTERM';
}
}
process.on(getStopSignal(), async () => {
const log: Logger = getLogger();
let hasError: boolean = false;
try {
......
......@@ -22,6 +22,7 @@ import * as fs from 'fs';
import { Deferred } from 'ts-deferred';
import { getExperimentId } from '../../common/experimentStartupInfo';
import { getLogger } from '../../common/log';
import { unixPathJoin } from '../../common/utils'
/**
* HDFS client utility, including copy file/directory
......@@ -32,7 +33,7 @@ export namespace HDFSClientUtility {
* @param hdfsUserName HDFS user name
*/
function hdfsExpRootDir(hdfsUserName: string): string {
return path.join('/', hdfsUserName, 'nni', 'experiments', getExperimentId());
return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId());
}
/**
......@@ -40,7 +41,7 @@ export namespace HDFSClientUtility {
* @param hdfsUserName HDFS user name
*/
export function getHdfsExpCodeDir(hdfsUserName: string): string {
return path.join(hdfsExpRootDir(hdfsUserName), 'codeDir');
return unixPathJoin(hdfsExpRootDir(hdfsUserName), 'codeDir');
}
/**
......@@ -49,7 +50,9 @@ export namespace HDFSClientUtility {
* @param trialId NNI trial ID
*/
export function getHdfsTrialWorkDir(hdfsUserName: string, trialId: string): string {
return path.join(hdfsExpRootDir(hdfsUserName), 'trials', trialId);
let root = hdfsExpRootDir(hdfsUserName)
console.log(root)
return unixPathJoin(root, 'trials', trialId);
}
/**
......
......@@ -40,7 +40,8 @@ import { delay, generateParamFileName,
getExperimentRootDir, getIPV4Address, getVersion, uniqueString } from '../../common/utils';
import { CONTAINER_INSTALL_NNI_SHELL_FORMAT } from '../common/containerJobData';
import { TrialConfigMetadataKey } from '../common/trialConfigMetadataKey';
import { validateCodeDir } from '../common/util';
import { validateCodeDir, execMkdir } from '../common/util';
import { unixPathJoin } from '../../common/utils'
import { HDFSClientUtility } from './hdfsClientUtility';
import { NNIPAITrialConfig, PAIClusterConfig, PAIJobConfig, PAITaskRole } from './paiConfig';
import { PAI_LOG_PATH_FORMAT, PAI_OUTPUT_DIR_FORMAT, PAI_TRIAL_COMMAND_FORMAT, PAITrialJobDetail } from './paiData';
......@@ -406,12 +407,12 @@ class PAITrainingService implements TrainingService {
}
// Step 1. Prepare PAI job configuration
const hdfsOutputDir : string = path.join(this.hdfsBaseDir, this.experimentId, trialJobId);
const hdfsOutputDir : string = unixPathJoin(this.hdfsBaseDir, this.experimentId, trialJobId);
const hdfsCodeDir: string = HDFSClientUtility.getHdfsTrialWorkDir(this.paiClusterConfig.userName, trialJobId);
const trialLocalTempFolder: string = path.join(getExperimentRootDir(), 'trials-local', trialJobId);
//create tmp trial working folder locally.
await cpp.exec(`mkdir -p ${trialLocalTempFolder}`);
await execMkdir(trialLocalTempFolder);
const runScriptContent : string = CONTAINER_INSTALL_NNI_SHELL_FORMAT;
// Write NNI installation file to local tmp files
......
......@@ -86,7 +86,7 @@ def convert_command():
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--ts", type=str, choices=['pai', 'kubeflow', 'remote'], default='pai')
parser.add_argument("--ts", type=str, choices=['pai', 'kubeflow', 'remote', 'local'], default='pai')
parser.add_argument("--nni_docker_image", type=str)
parser.add_argument("--nni_manager_ip", type=str)
# args for PAI
......@@ -111,4 +111,5 @@ if __name__ == '__main__':
args = parser.parse_args()
update_training_service_config(args)
convert_command()
if args.ts == 'local':
convert_command()
......@@ -14,7 +14,7 @@ jobs:
displayName: 'Install dependencies for integration tests'
- script: |
cd test
python generate_ts_config.py
python generate_ts_config.py --ts local
displayName: 'generate config files'
- script: |
cd test
......
jobs:
- job: 'build_docker_image'
timeoutInMinutes: 0
pool:
vmImage: 'Ubuntu 16.04'
steps:
- script: python3 -m pip install --upgrade pip setuptools --user
displayName: 'Install python tools'
- script: |
cd deployment/pypi
echo 'building prerelease package...'
make build
ls $(Build.SourcesDirectory)/deployment/pypi/dist/
condition: eq( variables['build_docker_img'], 'true' )
displayName: 'build nni bdsit_wheel'
- script: |
if [ $(build_docker_img) = 'true' ]
then
cd deployment/pypi
docker login -u $(docker_hub_user) -p $(docker_hub_pwd)
echo 'updating docker file for installing nni from local...'
# update Dockerfile to install NNI in docker image from whl file built in last step
sed -ie 's/RUN python3 -m pip --no-cache-dir install nni/COPY .\/dist\/* .\nRUN python3 -m pip install nni-*.whl/' ../docker/Dockerfile
cat ../docker/Dockerfile
export IMG_TAG=`date -u +%y%m%d%H%M`
echo 'build and upload docker image'
docker build -f ../docker/Dockerfile -t $(test_docker_img_name):$IMG_TAG .
docker push $(test_docker_img_name):$IMG_TAG
export TEST_IMG=$(test_docker_img_name):$IMG_TAG
cd ../../
else
export TEST_IMG=$(existing_docker_img)
fi
echo "##vso[task.setvariable variable=TEST_IMG]$TEST_IMG"
displayName: 'build docker image'
- script:
echo $TEST_IMG
echo "##vso[task.setvariable variable=docker_image;isOutput=true]$TEST_IMG"
name: setvariableStep
displayName: 'set image variable'
- job: 'integration_test_pai'
timeoutInMinutes: 0
dependsOn: build_docker_image
variables:
docker_image: $[ dependencies.build_docker_image.outputs['setvariableStep.docker_image'] ]
steps:
- script: |
set PATH=$(ENV_PATH)
python --version
powershell.exe -file install.ps1
displayName: 'Install nni toolkit via source code'
- script: |
cd test
set PATH=$(ENV_PATH)
python --version
python generate_ts_config.py --ts pai --pai_host $(pai_host) --pai_user $(pai_user) --pai_pwd $(pai_pwd) --vc $(pai_virtual_cluster) --nni_docker_image $(docker_image) --data_dir $(data_dir) --output_dir $(output_dir) --nni_manager_ip $(nni_manager_ip)
python config_test.py --ts pai --exclude multi_phase,smac,bohb
displayName: 'Examples and advanced features tests on pai'
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment