Unverified Commit aa316742 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Merge pull request #233 from microsoft/master

merge master
parents 3fe117f0 24fa4619
jobs: jobs:
- job: 'integration_test_remote_windows' - job: 'integration_test_remote_windows'
timeoutInMinutes: 0
steps: steps:
- script: python -m pip install --upgrade pip setuptools - script: python -m pip install --upgrade pip setuptools
...@@ -32,7 +33,10 @@ jobs: ...@@ -32,7 +33,10 @@ jobs:
- powershell: | - powershell: |
Write-Host "Downloading Putty..." Write-Host "Downloading Putty..."
(New-Object Net.WebClient).DownloadFile("https://the.earth.li/~sgtatham/putty/latest/w64/pscp.exe", "$(Agent.TempDirectory)\pscp.exe") (New-Object Net.WebClient).DownloadFile("https://the.earth.li/~sgtatham/putty/latest/w64/pscp.exe", "$(Agent.TempDirectory)\pscp.exe")
$(Agent.TempDirectory)\pscp.exe -hostkey $(hostkey) -pw $(pscp_pwd) $(remote_user)@$(remote_host):/tmp/nnitest/$(Build.BuildId)/port test\port Write-Host "Download Putty success!"
Write-Host "Connecting to host..."
$(Agent.TempDirectory)\pscp.exe -P $(remote_port) -hostkey $(hostkey) -pw $(pscp_pwd) $(remote_user)@$(remote_host):/tmp/nnitest/$(Build.BuildId)/port test\port
Write-Host "Get port success!"
Get-Content test\port Get-Content test\port
displayName: 'Get docker port' displayName: 'Get docker port'
- powershell: | - powershell: |
......
jobs: jobs:
- job: 'integration_test_remote' - job: 'integration_test_remote'
timeoutInMinutes: 0
steps: steps:
- script: python3 -m pip install --upgrade pip setuptools --user - script: python3 -m pip install --upgrade pip setuptools --user
...@@ -20,31 +21,31 @@ jobs: ...@@ -20,31 +21,31 @@ jobs:
displayName: 'Install dependencies for integration tests in remote mode' displayName: 'Install dependencies for integration tests in remote mode'
- task: CopyFilesOverSSH@0 - task: CopyFilesOverSSH@0
inputs: inputs:
sshEndpoint: remote_nni-ci-gpu-01 sshEndpoint: $(end_point)
sourceFolder: deployment/pypi/dist/ sourceFolder: deployment/pypi/dist/
targetFolder: /tmp/nnitest/$(Build.BuildId)/dist targetFolder: /tmp/nnitest/$(Build.BuildId)/dist
overwrite: true overwrite: true
displayName: 'Copy dist files to remote machine' displayName: 'Copy dist files to remote machine'
- task: CopyFilesOverSSH@0 - task: CopyFilesOverSSH@0
inputs: inputs:
sshEndpoint: remote_nni-ci-gpu-01 sshEndpoint: $(end_point)
sourceFolder: test sourceFolder: test
targetFolder: /tmp/nnitest/$(Build.BuildId)/test targetFolder: /tmp/nnitest/$(Build.BuildId)/test
overwrite: true overwrite: true
displayName: 'Copy test files to remote machine' displayName: 'Copy test files to remote machine'
- task: SSH@0 - task: SSH@0
inputs: inputs:
sshEndpoint: remote_nni-ci-gpu-01 sshEndpoint: $(end_point)
runOptions: commands runOptions: commands
commands: python3 /tmp/nnitest/$(Build.BuildId)/test/remote_docker.py --mode start --name $(Build.BuildId) --image nni/nni commands: python3 /tmp/nnitest/$(Build.BuildId)/test/remote_docker.py --mode start --name $(Build.BuildId) --image nni/nni
displayName: 'Start docker' displayName: 'Start docker'
- task: DownloadSecureFile@1 - task: DownloadSecureFile@1
inputs: inputs:
secureFile: remote_ci_private_key secureFile: $(remote_private_key)
- script: | - script: |
cp $(Agent.TempDirectory)/remote_ci_private_key test/id_rsa cp $(Agent.TempDirectory)/$(remote_private_key) test/id_rsa
chmod 600 test/id_rsa chmod 600 test/id_rsa
scp -i test/id_rsa $(remote_user)@$(remote_host):/tmp/nnitest/$(Build.BuildId)/port test/port scp -P $(remote_port) -i test/id_rsa $(remote_user)@$(remote_host):/tmp/nnitest/$(Build.BuildId)/port test/port
cat test/port cat test/port
displayName: 'Get docker port' displayName: 'Get docker port'
- script: | - script: |
...@@ -58,7 +59,7 @@ jobs: ...@@ -58,7 +59,7 @@ jobs:
displayName: 'integration test' displayName: 'integration test'
- task: SSH@0 - task: SSH@0
inputs: inputs:
sshEndpoint: remote_nni-ci-gpu-01 sshEndpoint: $(end_point)
runOptions: commands runOptions: commands
commands: python3 /tmp/nnitest/$(Build.BuildId)/test/remote_docker.py --mode stop --name $(Build.BuildId) commands: python3 /tmp/nnitest/$(Build.BuildId)/test/remote_docker.py --mode stop --name $(Build.BuildId)
displayName: 'Stop docker' displayName: 'Stop docker'
...@@ -24,6 +24,32 @@ kubeflow: ...@@ -24,6 +24,32 @@ kubeflow:
image: image:
trainingServicePlatform: kubeflow trainingServicePlatform: kubeflow
frameworkcontroller:
maxExecDuration: 15m
nniManagerIp:
frameworkcontrollerConfig:
serviceAccountName: frameworkbarrier
storage: azureStorage
keyVault:
vaultName:
name:
azureStorage:
accountName:
azureShare:
trial:
taskRoles:
- name: worker
taskNum: 1
command:
gpuNum: 1
cpuNum: 1
memoryMB: 8192
image:
frameworkAttemptCompletionPolicy:
minFailedTaskCount: 1
minSucceededTaskCount: 1
trainingServicePlatform: frameworkcontroller
local: local:
trainingServicePlatform: local trainingServicePlatform: local
pai: pai:
......
...@@ -78,17 +78,17 @@ def get_nni_installation_path(): ...@@ -78,17 +78,17 @@ def get_nni_installation_path():
print_error('Fail to find nni under python library') print_error('Fail to find nni under python library')
exit(1) exit(1)
def start_rest_server(args, platform, mode, config_file_name, experiment_id=None, log_dir=None, log_level=None): def start_rest_server(port, platform, mode, config_file_name, foreground=False, experiment_id=None, log_dir=None, log_level=None):
'''Run nni manager process''' '''Run nni manager process'''
if detect_port(args.port): if detect_port(port):
print_error('Port %s is used by another process, please reset the port!\n' \ print_error('Port %s is used by another process, please reset the port!\n' \
'You could use \'nnictl create --help\' to get help information' % args.port) 'You could use \'nnictl create --help\' to get help information' % port)
exit(1) exit(1)
if (platform != 'local') and detect_port(int(args.port) + 1): if (platform != 'local') and detect_port(int(port) + 1):
print_error('PAI mode need an additional adjacent port %d, and the port %d is used by another process!\n' \ print_error('PAI mode need an additional adjacent port %d, and the port %d is used by another process!\n' \
'You could set another port to start experiment!\n' \ 'You could set another port to start experiment!\n' \
'You could use \'nnictl create --help\' to get help information' % ((int(args.port) + 1), (int(args.port) + 1))) 'You could use \'nnictl create --help\' to get help information' % ((int(port) + 1), (int(port) + 1)))
exit(1) exit(1)
print_normal('Starting restful server...') print_normal('Starting restful server...')
...@@ -99,7 +99,7 @@ def start_rest_server(args, platform, mode, config_file_name, experiment_id=None ...@@ -99,7 +99,7 @@ def start_rest_server(args, platform, mode, config_file_name, experiment_id=None
node_command = 'node' node_command = 'node'
if sys.platform == 'win32': if sys.platform == 'win32':
node_command = os.path.join(entry_dir[:-3], 'Scripts', 'node.exe') node_command = os.path.join(entry_dir[:-3], 'Scripts', 'node.exe')
cmds = [node_command, entry_file, '--port', str(args.port), '--mode', platform] cmds = [node_command, entry_file, '--port', str(port), '--mode', platform]
if mode == 'view': if mode == 'view':
cmds += ['--start_mode', 'resume'] cmds += ['--start_mode', 'resume']
cmds += ['--readonly', 'true'] cmds += ['--readonly', 'true']
...@@ -111,7 +111,7 @@ def start_rest_server(args, platform, mode, config_file_name, experiment_id=None ...@@ -111,7 +111,7 @@ def start_rest_server(args, platform, mode, config_file_name, experiment_id=None
cmds += ['--log_level', log_level] cmds += ['--log_level', log_level]
if mode in ['resume', 'view']: if mode in ['resume', 'view']:
cmds += ['--experiment_id', experiment_id] cmds += ['--experiment_id', experiment_id]
if args.foreground: if foreground:
cmds += ['--foreground', 'true'] cmds += ['--foreground', 'true']
stdout_full_path, stderr_full_path = get_log_path(config_file_name) stdout_full_path, stderr_full_path = get_log_path(config_file_name)
with open(stdout_full_path, 'a+') as stdout_file, open(stderr_full_path, 'a+') as stderr_file: with open(stdout_full_path, 'a+') as stdout_file, open(stderr_full_path, 'a+') as stderr_file:
...@@ -122,12 +122,12 @@ def start_rest_server(args, platform, mode, config_file_name, experiment_id=None ...@@ -122,12 +122,12 @@ def start_rest_server(args, platform, mode, config_file_name, experiment_id=None
stderr_file.write(log_header) stderr_file.write(log_header)
if sys.platform == 'win32': if sys.platform == 'win32':
from subprocess import CREATE_NEW_PROCESS_GROUP from subprocess import CREATE_NEW_PROCESS_GROUP
if args.foreground: if foreground:
process = Popen(cmds, cwd=entry_dir, stdout=PIPE, stderr=STDOUT, creationflags=CREATE_NEW_PROCESS_GROUP) process = Popen(cmds, cwd=entry_dir, stdout=PIPE, stderr=STDOUT, creationflags=CREATE_NEW_PROCESS_GROUP)
else: else:
process = Popen(cmds, cwd=entry_dir, stdout=stdout_file, stderr=stderr_file, creationflags=CREATE_NEW_PROCESS_GROUP) process = Popen(cmds, cwd=entry_dir, stdout=stdout_file, stderr=stderr_file, creationflags=CREATE_NEW_PROCESS_GROUP)
else: else:
if args.foreground: if foreground:
process = Popen(cmds, cwd=entry_dir, stdout=PIPE, stderr=PIPE) process = Popen(cmds, cwd=entry_dir, stdout=PIPE, stderr=PIPE)
else: else:
process = Popen(cmds, cwd=entry_dir, stdout=stdout_file, stderr=stderr_file) process = Popen(cmds, cwd=entry_dir, stdout=stdout_file, stderr=stderr_file)
...@@ -428,12 +428,14 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen ...@@ -428,12 +428,14 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else None log_dir = experiment_config['logDir'] if experiment_config.get('logDir') else None
log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None log_level = experiment_config['logLevel'] if experiment_config.get('logLevel') else None
#view experiment mode do not need debug function, when view an experiment, there will be no new logs created #view experiment mode do not need debug function, when view an experiment, there will be no new logs created
foreground = False
if mode != 'view': if mode != 'view':
foreground = args.foreground
if log_level not in ['trace', 'debug'] and (args.debug or experiment_config.get('debug') is True): if log_level not in ['trace', 'debug'] and (args.debug or experiment_config.get('debug') is True):
log_level = 'debug' log_level = 'debug'
# start rest server # start rest server
rest_process, start_time = start_rest_server(args, experiment_config['trainingServicePlatform'], \ rest_process, start_time = start_rest_server(args.port, experiment_config['trainingServicePlatform'], \
mode, config_file_name, experiment_id, log_dir, log_level) mode, config_file_name, foreground, experiment_id, log_dir, log_level)
nni_config.set_config('restServerPid', rest_process.pid) nni_config.set_config('restServerPid', rest_process.pid)
# Deal with annotation # Deal with annotation
if experiment_config.get('useAnnotation'): if experiment_config.get('useAnnotation'):
...@@ -501,7 +503,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen ...@@ -501,7 +503,7 @@ def launch_experiment(args, experiment_config, mode, config_file_name, experimen
experiment_config['experimentName']) experiment_config['experimentName'])
print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list))) print_normal(EXPERIMENT_SUCCESS_INFO % (experiment_id, ' '.join(web_ui_url_list)))
if args.foreground: if mode != 'view' and args.foreground:
try: try:
while True: while True:
log_content = rest_process.stdout.readline().strip().decode('utf-8') log_content = rest_process.stdout.readline().strip().decode('utf-8')
......
...@@ -63,10 +63,10 @@ def parse_args(): ...@@ -63,10 +63,10 @@ def parse_args():
parser_resume.set_defaults(func=resume_experiment) parser_resume.set_defaults(func=resume_experiment)
# parse view command # parse view command
parser_resume = subparsers.add_parser('view', help='view a stopped experiment') parser_view = subparsers.add_parser('view', help='view a stopped experiment')
parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to view') parser_view.add_argument('id', nargs='?', help='The id of the experiment you want to view')
parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server') parser_view.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server')
parser_resume.set_defaults(func=view_experiment) parser_view.set_defaults(func=view_experiment)
# parse update command # parse update command
parser_updater = subparsers.add_parser('update', help='update the experiment') parser_updater = subparsers.add_parser('update', help='update the experiment')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment