Unverified Commit d5072a29 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Support save and open experiments (#2750)

parent 8961d7a5
...@@ -444,9 +444,6 @@ Debug mode will disable version check function in Trialkeeper. ...@@ -444,9 +444,6 @@ Debug mode will disable version check function in Trialkeeper.
|--all| False| |delete all of experiments| |--all| False| |delete all of experiments|
<a name="export"></a>
* __nnictl experiment export__ * __nnictl experiment export__
* Description * Description
...@@ -531,6 +528,62 @@ Debug mode will disable version check function in Trialkeeper. ...@@ -531,6 +528,62 @@ Debug mode will disable version check function in Trialkeeper.
nnictl experiment import [experiment_id] -f experiment_data.json nnictl experiment import [experiment_id] -f experiment_data.json
``` ```
* __nnictl experiment save__
* Description
Save nni experiment metadata and code data.
* Usage
```bash
nnictl experiment save [OPTIONS]
```
* Options
|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|id| True| |The id of the experiment you want to save|
|--path, -p| False| |the folder path to store nni experiment data, default current working directory|
|--saveCodeDir, -s| False| |save codeDir data of the experiment, default False|
* Examples
> save an expeirment
```bash
nnictl experiment save [experiment_id] --saveCodeDir
```
* __nnictl experiment load__
* Description
Load an nni experiment.
* Usage
```bash
nnictl experiment load [OPTIONS]
```
* Options
|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|--path, -p| True| |the file path of nni package|
|--codeDir, -c| True| |the path of codeDir for loaded experiment, this path will also put the code in the loaded experiment package|
|--logDir, -l| False| |the path of logDir for loaded experiment|
* Examples
> load an expeirment
```bash
nnictl experiment load --path [path] --codeDir [codeDir]
```
<a name="platform"></a> <a name="platform"></a>
### Manage platform information ### Manage platform information
......
...@@ -4,7 +4,10 @@ ...@@ -4,7 +4,10 @@
import os import os
import sys import sys
import json import json
import tempfile
import socket import socket
import string
import random
import ruamel.yaml as yaml import ruamel.yaml as yaml
import psutil import psutil
from colorama import Fore from colorama import Fore
...@@ -83,3 +86,12 @@ def check_tensorboard_version(): ...@@ -83,3 +86,12 @@ def check_tensorboard_version():
print_error('import tensorboard error!') print_error('import tensorboard error!')
exit(1) exit(1)
def generate_temp_dir():
'''generate a temp folder'''
def generate_folder_name():
return os.path.join(tempfile.gettempdir(), 'nni', ''.join(random.sample(string.ascii_letters + string.digits, 8)))
temp_dir = generate_folder_name()
while os.path.exists(temp_dir):
temp_dir = generate_folder_name()
os.makedirs(temp_dir)
return temp_dir
...@@ -54,13 +54,13 @@ class Experiments: ...@@ -54,13 +54,13 @@ class Experiments:
self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment') self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment')
self.experiments = self.read_file() self.experiments = self.read_file()
def add_experiment(self, expId, port, time, file_name, platform, experiment_name): def add_experiment(self, expId, port, startTime, file_name, platform, experiment_name, endTime='N/A', status='INITIALIZED'):
'''set {key:value} paris to self.experiment''' '''set {key:value} paris to self.experiment'''
self.experiments[expId] = {} self.experiments[expId] = {}
self.experiments[expId]['port'] = port self.experiments[expId]['port'] = port
self.experiments[expId]['startTime'] = time self.experiments[expId]['startTime'] = startTime
self.experiments[expId]['endTime'] = 'N/A' self.experiments[expId]['endTime'] = endTime
self.experiments[expId]['status'] = 'INITIALIZED' self.experiments[expId]['status'] = status
self.experiments[expId]['fileName'] = file_name self.experiments[expId]['fileName'] = file_name
self.experiments[expId]['platform'] = platform self.experiments[expId]['platform'] = platform
self.experiments[expId]['experimentName'] = experiment_name self.experiments[expId]['experimentName'] = experiment_name
......
...@@ -6,6 +6,8 @@ from colorama import Fore ...@@ -6,6 +6,8 @@ from colorama import Fore
NNICTL_HOME_DIR = os.path.join(os.path.expanduser('~'), '.local', 'nnictl') NNICTL_HOME_DIR = os.path.join(os.path.expanduser('~'), '.local', 'nnictl')
NNI_HOME_DIR = os.path.join(os.path.expanduser('~'), 'nni-experiments')
ERROR_INFO = 'ERROR: ' ERROR_INFO = 'ERROR: '
NORMAL_INFO = 'INFO: ' NORMAL_INFO = 'INFO: '
WARNING_INFO = 'WARNING: ' WARNING_INFO = 'WARNING: '
......
...@@ -11,7 +11,8 @@ from .updater import update_searchspace, update_concurrency, update_duration, up ...@@ -11,7 +11,8 @@ from .updater import update_searchspace, update_concurrency, update_duration, up
from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment, experiment_status,\ from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment, experiment_status,\
log_trial, experiment_clean, platform_clean, experiment_list, \ log_trial, experiment_clean, platform_clean, experiment_list, \
monitor_experiment, export_trials_data, trial_codegen, webui_url, \ monitor_experiment, export_trials_data, trial_codegen, webui_url, \
get_config, log_stdout, log_stderr, search_space_auto_gen, webui_nas get_config, log_stdout, log_stderr, search_space_auto_gen, webui_nas, \
save_experiment, load_experiment
from .package_management import package_install, package_uninstall, package_show, package_list from .package_management import package_install, package_uninstall, package_show, package_list
from .constants import DEFAULT_REST_PORT from .constants import DEFAULT_REST_PORT
from .tensorboard_utils import start_tensorboard, stop_tensorboard from .tensorboard_utils import start_tensorboard, stop_tensorboard
...@@ -129,15 +130,6 @@ def parse_args(): ...@@ -129,15 +130,6 @@ def parse_args():
parser_experiment_clean.add_argument('id', nargs='?', help='the id of experiment') parser_experiment_clean.add_argument('id', nargs='?', help='the id of experiment')
parser_experiment_clean.add_argument('--all', action='store_true', default=False, help='delete all of experiments') parser_experiment_clean.add_argument('--all', action='store_true', default=False, help='delete all of experiments')
parser_experiment_clean.set_defaults(func=experiment_clean) parser_experiment_clean.set_defaults(func=experiment_clean)
#parse experiment command
parser_platform = subparsers.add_parser('platform', help='get platform information')
#add subparsers for parser_experiment
parser_platform_subparsers = parser_platform.add_subparsers()
parser_platform_clean = parser_platform_subparsers.add_parser('clean', help='clean up the platform data')
parser_platform_clean.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_platform_clean.set_defaults(func=platform_clean)
#import tuning data #import tuning data
parser_import_data = parser_experiment_subparsers.add_parser('import', help='import additional data') parser_import_data = parser_experiment_subparsers.add_parser('import', help='import additional data')
parser_import_data.add_argument('id', nargs='?', help='the id of experiment') parser_import_data.add_argument('id', nargs='?', help='the id of experiment')
...@@ -149,6 +141,29 @@ def parse_args(): ...@@ -149,6 +141,29 @@ def parse_args():
parser_trial_export.add_argument('--type', '-t', choices=['json', 'csv'], required=True, dest='type', help='target file type') parser_trial_export.add_argument('--type', '-t', choices=['json', 'csv'], required=True, dest='type', help='target file type')
parser_trial_export.add_argument('--filename', '-f', required=True, dest='path', help='target file path') parser_trial_export.add_argument('--filename', '-f', required=True, dest='path', help='target file path')
parser_trial_export.set_defaults(func=export_trials_data) parser_trial_export.set_defaults(func=export_trials_data)
#save an NNI experiment
parser_save_experiment = parser_experiment_subparsers.add_parser('save', help='save an experiment')
parser_save_experiment.add_argument('id', nargs='?', help='the id of experiment')
parser_save_experiment.add_argument('--path', '-p', required=False, help='the folder path to store nni experiment data, \
default current working directory')
parser_save_experiment.add_argument('--saveCodeDir', '-s', action='store_true', default=False, help='save codeDir data \
of the experiment')
parser_save_experiment.set_defaults(func=save_experiment)
#load an NNI experiment
parser_load_experiment = parser_experiment_subparsers.add_parser('load', help='load an experiment')
parser_load_experiment.add_argument('--path', '-p', required=True, help='the path of nni package file')
parser_load_experiment.add_argument('--codeDir', '-c', required=True, help='the path of codeDir for loaded experiment, \
this path will also put the code in the loaded experiment package')
parser_load_experiment.add_argument('--logDir', '-l', required=False, help='the path of logDir for loaded experiment')
parser_load_experiment.set_defaults(func=load_experiment)
#parse platform command
parser_platform = subparsers.add_parser('platform', help='get platform information')
#add subparsers for parser_platform
parser_platform_subparsers = parser_platform.add_subparsers()
parser_platform_clean = parser_platform_subparsers.add_parser('clean', help='clean up the platform data')
parser_platform_clean.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_platform_clean.set_defaults(func=platform_clean)
#TODO:finish webui function #TODO:finish webui function
#parse board command #parse board command
......
...@@ -18,9 +18,9 @@ from nni_annotation import expand_annotations ...@@ -18,9 +18,9 @@ from nni_annotation import expand_annotations
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url
from .config_utils import Config, Experiments from .config_utils import Config, Experiments
from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \ from .constants import NNICTL_HOME_DIR, NNI_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \
EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT
from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content, generate_temp_dir
from .command_utils import check_output_command, kill_command from .command_utils import check_output_command, kill_command
from .ssh_utils import create_ssh_sftp_client, remove_remote_directory from .ssh_utils import create_ssh_sftp_client, remove_remote_directory
...@@ -736,3 +736,165 @@ def search_space_auto_gen(args): ...@@ -736,3 +736,165 @@ def search_space_auto_gen(args):
print_warning('Expected search space file \'{}\' generated, but not found.'.format(file_path)) print_warning('Expected search space file \'{}\' generated, but not found.'.format(file_path))
else: else:
print_normal('Generate search space done: \'{}\'.'.format(file_path)) print_normal('Generate search space done: \'{}\'.'.format(file_path))
def save_experiment(args):
'''save experiment data to a zip file'''
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
if args.id is None:
print_error('Please set experiment id.')
exit(1)
if args.id not in experiment_dict:
print_error('Cannot find experiment {0}.'.format(args.id))
exit(1)
if experiment_dict[args.id].get('status') != 'STOPPED':
print_error('Can only save stopped experiment!')
exit(1)
print_normal('Saving...')
nni_config = Config(experiment_dict[args.id]['fileName'])
logDir = os.path.join(NNI_HOME_DIR, args.id)
if nni_config.get_config('logDir'):
logDir = os.path.join(nni_config.get_config('logDir'), args.id)
temp_root_dir = generate_temp_dir()
# Step1. Copy logDir to temp folder
if not os.path.exists(logDir):
print_error('logDir: %s does not exist!' % logDir)
exit(1)
temp_experiment_dir = os.path.join(temp_root_dir, 'experiment')
shutil.copytree(logDir, temp_experiment_dir)
# Step2. Copy nnictl metadata to temp folder
temp_nnictl_dir = os.path.join(temp_root_dir, 'nnictl')
os.makedirs(temp_nnictl_dir, exist_ok=True)
try:
with open(os.path.join(temp_nnictl_dir, '.experiment'), 'w') as file:
experiment_dict[args.id]['id'] = args.id
json.dump(experiment_dict[args.id], file)
except IOError:
print_error('Write file to %s failed!' % os.path.join(temp_nnictl_dir, '.experiment'))
exit(1)
nnictl_config_dir = os.path.join(NNICTL_HOME_DIR, experiment_dict[args.id]['fileName'])
shutil.copytree(nnictl_config_dir, os.path.join(temp_nnictl_dir, experiment_dict[args.id]['fileName']))
# Step3. Copy code dir
if args.saveCodeDir:
temp_code_dir = os.path.join(temp_root_dir, 'code')
shutil.copytree(nni_config.get_config('experimentConfig')['trial']['codeDir'], temp_code_dir)
# Step4. Archive folder
zip_package_name = 'nni_experiment_%s' % args.id
if args.path:
os.makedirs(args.path, exist_ok=True)
zip_package_name = os.path.join(args.path, zip_package_name)
shutil.make_archive(zip_package_name, 'zip', temp_root_dir)
print_normal('Save to %s.zip success!' % zip_package_name)
# Step5. Cleanup temp data
shutil.rmtree(temp_root_dir)
def load_experiment(args):
'''load experiment data'''
package_path = os.path.expanduser(args.path)
if not os.path.exists(args.path):
print_error('file path %s does not exist!' % args.path)
exit(1)
temp_root_dir = generate_temp_dir()
shutil.unpack_archive(package_path, temp_root_dir)
print_normal('Loading...')
# Step1. Validation
if not os.path.exists(args.codeDir):
print_error('Invalid: codeDir path does not exist!')
exit(1)
if args.logDir:
if not os.path.exists(args.logDir):
print_error('Invalid: logDir path does not exist!')
exit(1)
experiment_temp_dir = os.path.join(temp_root_dir, 'experiment')
if not os.path.exists(os.path.join(experiment_temp_dir, 'db')):
print_error('Invalid archive file: db file does not exist!')
shutil.rmtree(temp_root_dir)
exit(1)
nnictl_temp_dir = os.path.join(temp_root_dir, 'nnictl')
if not os.path.exists(os.path.join(nnictl_temp_dir, '.experiment')):
print_error('Invalid archive file: nnictl metadata file does not exist!')
shutil.rmtree(temp_root_dir)
exit(1)
try:
with open(os.path.join(nnictl_temp_dir, '.experiment'), 'r') as file:
experiment_metadata = json.load(file)
except ValueError as err:
print_error('Invalid nnictl metadata file: %s' % err)
shutil.rmtree(temp_root_dir)
exit(1)
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
experiment_id = experiment_metadata.get('id')
if experiment_id in experiment_dict:
print_error('Invalid: experiment id already exist!')
shutil.rmtree(temp_root_dir)
exit(1)
if not os.path.exists(os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName'))):
print_error('Invalid: experiment metadata does not exist!')
shutil.rmtree(temp_root_dir)
exit(1)
# Step2. Copy nnictl metadata
src_path = os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName'))
dest_path = os.path.join(NNICTL_HOME_DIR, experiment_metadata.get('fileName'))
if os.path.exists(dest_path):
shutil.rmtree(dest_path)
shutil.copytree(src_path, dest_path)
# Step3. Copy experiment data
nni_config = Config(experiment_metadata.get('fileName'))
nnictl_exp_config = nni_config.get_config('experimentConfig')
if args.logDir:
logDir = args.logDir
nnictl_exp_config['logDir'] = logDir
else:
if nnictl_exp_config.get('logDir'):
logDir = nnictl_exp_config['logDir']
else:
logDir = NNI_HOME_DIR
os.rename(os.path.join(temp_root_dir, 'experiment'), os.path.join(temp_root_dir, experiment_id))
src_path = os.path.join(os.path.join(temp_root_dir, experiment_id))
dest_path = os.path.join(os.path.join(logDir, experiment_id))
if os.path.exists(dest_path):
shutil.rmtree(dest_path)
shutil.copytree(src_path, dest_path)
# Step4. Copy code dir
codeDir = os.path.expanduser(args.codeDir)
if not os.path.isabs(codeDir):
codeDir = os.path.join(os.getcwd(), codeDir)
print_normal('Expand codeDir to %s' % codeDir)
nnictl_exp_config['trial']['codeDir'] = codeDir
archive_code_dir = os.path.join(temp_root_dir, 'code')
if os.path.exists(archive_code_dir):
file_list = os.listdir(archive_code_dir)
for file_name in file_list:
src_path = os.path.join(archive_code_dir, file_name)
target_path = os.path.join(codeDir, file_name)
if os.path.exists(target_path):
print_error('Copy %s failed, %s exist!' % (file_name, target_path))
continue
if os.path.isdir(src_path):
shutil.copytree(src_path, target_path)
else:
shutil.copy(src_path, target_path)
# Step5. Create experiment metadata
nni_config.set_config('experimentConfig', nnictl_exp_config)
experiment_config.add_experiment(experiment_id,
experiment_metadata.get('port'),
experiment_metadata.get('startTime'),
experiment_metadata.get('fileName'),
experiment_metadata.get('platform'),
experiment_metadata.get('experimentName'),
experiment_metadata.get('endTime'),
experiment_metadata.get('status'))
print_normal('Load experiment %s succsss!' % experiment_id)
# Step6. Cleanup temp data
shutil.rmtree(temp_root_dir)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment