Unverified Commit d5072a29 authored by SparkSnail's avatar SparkSnail Committed by GitHub
Browse files

Support save and open experiments (#2750)

parent 8961d7a5
......@@ -444,9 +444,6 @@ Debug mode will disable version check function in Trialkeeper.
|--all| False| |delete all of experiments|
<a name="export"></a>
* __nnictl experiment export__
* Description
......@@ -531,6 +528,62 @@ Debug mode will disable version check function in Trialkeeper.
nnictl experiment import [experiment_id] -f experiment_data.json
```
* __nnictl experiment save__
* Description
Save nni experiment metadata and code data.
* Usage
```bash
nnictl experiment save [OPTIONS]
```
* Options
|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|id| True| |The id of the experiment you want to save|
|--path, -p| False| |the folder path to store nni experiment data, default current working directory|
|--saveCodeDir, -s| False| |save codeDir data of the experiment, default False|
* Examples
> save an expeirment
```bash
nnictl experiment save [experiment_id] --saveCodeDir
```
* __nnictl experiment load__
* Description
Load an nni experiment.
* Usage
```bash
nnictl experiment load [OPTIONS]
```
* Options
|Name, shorthand|Required|Default|Description|
|------|------|------ |------|
|--path, -p| True| |the file path of nni package|
|--codeDir, -c| True| |the path of codeDir for loaded experiment, this path will also put the code in the loaded experiment package|
|--logDir, -l| False| |the path of logDir for loaded experiment|
* Examples
> load an expeirment
```bash
nnictl experiment load --path [path] --codeDir [codeDir]
```
<a name="platform"></a>
### Manage platform information
......
......@@ -4,7 +4,10 @@
import os
import sys
import json
import tempfile
import socket
import string
import random
import ruamel.yaml as yaml
import psutil
from colorama import Fore
......@@ -83,3 +86,12 @@ def check_tensorboard_version():
print_error('import tensorboard error!')
exit(1)
def generate_temp_dir():
'''generate a temp folder'''
def generate_folder_name():
return os.path.join(tempfile.gettempdir(), 'nni', ''.join(random.sample(string.ascii_letters + string.digits, 8)))
temp_dir = generate_folder_name()
while os.path.exists(temp_dir):
temp_dir = generate_folder_name()
os.makedirs(temp_dir)
return temp_dir
......@@ -54,13 +54,13 @@ class Experiments:
self.experiment_file = os.path.join(NNICTL_HOME_DIR, '.experiment')
self.experiments = self.read_file()
def add_experiment(self, expId, port, time, file_name, platform, experiment_name):
def add_experiment(self, expId, port, startTime, file_name, platform, experiment_name, endTime='N/A', status='INITIALIZED'):
'''set {key:value} paris to self.experiment'''
self.experiments[expId] = {}
self.experiments[expId]['port'] = port
self.experiments[expId]['startTime'] = time
self.experiments[expId]['endTime'] = 'N/A'
self.experiments[expId]['status'] = 'INITIALIZED'
self.experiments[expId]['startTime'] = startTime
self.experiments[expId]['endTime'] = endTime
self.experiments[expId]['status'] = status
self.experiments[expId]['fileName'] = file_name
self.experiments[expId]['platform'] = platform
self.experiments[expId]['experimentName'] = experiment_name
......
......@@ -6,6 +6,8 @@ from colorama import Fore
NNICTL_HOME_DIR = os.path.join(os.path.expanduser('~'), '.local', 'nnictl')
NNI_HOME_DIR = os.path.join(os.path.expanduser('~'), 'nni-experiments')
ERROR_INFO = 'ERROR: '
NORMAL_INFO = 'INFO: '
WARNING_INFO = 'WARNING: '
......
......@@ -11,7 +11,8 @@ from .updater import update_searchspace, update_concurrency, update_duration, up
from .nnictl_utils import stop_experiment, trial_ls, trial_kill, list_experiment, experiment_status,\
log_trial, experiment_clean, platform_clean, experiment_list, \
monitor_experiment, export_trials_data, trial_codegen, webui_url, \
get_config, log_stdout, log_stderr, search_space_auto_gen, webui_nas
get_config, log_stdout, log_stderr, search_space_auto_gen, webui_nas, \
save_experiment, load_experiment
from .package_management import package_install, package_uninstall, package_show, package_list
from .constants import DEFAULT_REST_PORT
from .tensorboard_utils import start_tensorboard, stop_tensorboard
......@@ -129,15 +130,6 @@ def parse_args():
parser_experiment_clean.add_argument('id', nargs='?', help='the id of experiment')
parser_experiment_clean.add_argument('--all', action='store_true', default=False, help='delete all of experiments')
parser_experiment_clean.set_defaults(func=experiment_clean)
#parse experiment command
parser_platform = subparsers.add_parser('platform', help='get platform information')
#add subparsers for parser_experiment
parser_platform_subparsers = parser_platform.add_subparsers()
parser_platform_clean = parser_platform_subparsers.add_parser('clean', help='clean up the platform data')
parser_platform_clean.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_platform_clean.set_defaults(func=platform_clean)
#import tuning data
parser_import_data = parser_experiment_subparsers.add_parser('import', help='import additional data')
parser_import_data.add_argument('id', nargs='?', help='the id of experiment')
......@@ -149,6 +141,29 @@ def parse_args():
parser_trial_export.add_argument('--type', '-t', choices=['json', 'csv'], required=True, dest='type', help='target file type')
parser_trial_export.add_argument('--filename', '-f', required=True, dest='path', help='target file path')
parser_trial_export.set_defaults(func=export_trials_data)
#save an NNI experiment
parser_save_experiment = parser_experiment_subparsers.add_parser('save', help='save an experiment')
parser_save_experiment.add_argument('id', nargs='?', help='the id of experiment')
parser_save_experiment.add_argument('--path', '-p', required=False, help='the folder path to store nni experiment data, \
default current working directory')
parser_save_experiment.add_argument('--saveCodeDir', '-s', action='store_true', default=False, help='save codeDir data \
of the experiment')
parser_save_experiment.set_defaults(func=save_experiment)
#load an NNI experiment
parser_load_experiment = parser_experiment_subparsers.add_parser('load', help='load an experiment')
parser_load_experiment.add_argument('--path', '-p', required=True, help='the path of nni package file')
parser_load_experiment.add_argument('--codeDir', '-c', required=True, help='the path of codeDir for loaded experiment, \
this path will also put the code in the loaded experiment package')
parser_load_experiment.add_argument('--logDir', '-l', required=False, help='the path of logDir for loaded experiment')
parser_load_experiment.set_defaults(func=load_experiment)
#parse platform command
parser_platform = subparsers.add_parser('platform', help='get platform information')
#add subparsers for parser_platform
parser_platform_subparsers = parser_platform.add_subparsers()
parser_platform_clean = parser_platform_subparsers.add_parser('clean', help='clean up the platform data')
parser_platform_clean.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_platform_clean.set_defaults(func=platform_clean)
#TODO:finish webui function
#parse board command
......
......@@ -18,9 +18,9 @@ from nni_annotation import expand_annotations
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response
from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url
from .config_utils import Config, Experiments
from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \
from .constants import NNICTL_HOME_DIR, NNI_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \
EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT
from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content
from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content, generate_temp_dir
from .command_utils import check_output_command, kill_command
from .ssh_utils import create_ssh_sftp_client, remove_remote_directory
......@@ -736,3 +736,165 @@ def search_space_auto_gen(args):
print_warning('Expected search space file \'{}\' generated, but not found.'.format(file_path))
else:
print_normal('Generate search space done: \'{}\'.'.format(file_path))
def save_experiment(args):
'''save experiment data to a zip file'''
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
if args.id is None:
print_error('Please set experiment id.')
exit(1)
if args.id not in experiment_dict:
print_error('Cannot find experiment {0}.'.format(args.id))
exit(1)
if experiment_dict[args.id].get('status') != 'STOPPED':
print_error('Can only save stopped experiment!')
exit(1)
print_normal('Saving...')
nni_config = Config(experiment_dict[args.id]['fileName'])
logDir = os.path.join(NNI_HOME_DIR, args.id)
if nni_config.get_config('logDir'):
logDir = os.path.join(nni_config.get_config('logDir'), args.id)
temp_root_dir = generate_temp_dir()
# Step1. Copy logDir to temp folder
if not os.path.exists(logDir):
print_error('logDir: %s does not exist!' % logDir)
exit(1)
temp_experiment_dir = os.path.join(temp_root_dir, 'experiment')
shutil.copytree(logDir, temp_experiment_dir)
# Step2. Copy nnictl metadata to temp folder
temp_nnictl_dir = os.path.join(temp_root_dir, 'nnictl')
os.makedirs(temp_nnictl_dir, exist_ok=True)
try:
with open(os.path.join(temp_nnictl_dir, '.experiment'), 'w') as file:
experiment_dict[args.id]['id'] = args.id
json.dump(experiment_dict[args.id], file)
except IOError:
print_error('Write file to %s failed!' % os.path.join(temp_nnictl_dir, '.experiment'))
exit(1)
nnictl_config_dir = os.path.join(NNICTL_HOME_DIR, experiment_dict[args.id]['fileName'])
shutil.copytree(nnictl_config_dir, os.path.join(temp_nnictl_dir, experiment_dict[args.id]['fileName']))
# Step3. Copy code dir
if args.saveCodeDir:
temp_code_dir = os.path.join(temp_root_dir, 'code')
shutil.copytree(nni_config.get_config('experimentConfig')['trial']['codeDir'], temp_code_dir)
# Step4. Archive folder
zip_package_name = 'nni_experiment_%s' % args.id
if args.path:
os.makedirs(args.path, exist_ok=True)
zip_package_name = os.path.join(args.path, zip_package_name)
shutil.make_archive(zip_package_name, 'zip', temp_root_dir)
print_normal('Save to %s.zip success!' % zip_package_name)
# Step5. Cleanup temp data
shutil.rmtree(temp_root_dir)
def load_experiment(args):
'''load experiment data'''
package_path = os.path.expanduser(args.path)
if not os.path.exists(args.path):
print_error('file path %s does not exist!' % args.path)
exit(1)
temp_root_dir = generate_temp_dir()
shutil.unpack_archive(package_path, temp_root_dir)
print_normal('Loading...')
# Step1. Validation
if not os.path.exists(args.codeDir):
print_error('Invalid: codeDir path does not exist!')
exit(1)
if args.logDir:
if not os.path.exists(args.logDir):
print_error('Invalid: logDir path does not exist!')
exit(1)
experiment_temp_dir = os.path.join(temp_root_dir, 'experiment')
if not os.path.exists(os.path.join(experiment_temp_dir, 'db')):
print_error('Invalid archive file: db file does not exist!')
shutil.rmtree(temp_root_dir)
exit(1)
nnictl_temp_dir = os.path.join(temp_root_dir, 'nnictl')
if not os.path.exists(os.path.join(nnictl_temp_dir, '.experiment')):
print_error('Invalid archive file: nnictl metadata file does not exist!')
shutil.rmtree(temp_root_dir)
exit(1)
try:
with open(os.path.join(nnictl_temp_dir, '.experiment'), 'r') as file:
experiment_metadata = json.load(file)
except ValueError as err:
print_error('Invalid nnictl metadata file: %s' % err)
shutil.rmtree(temp_root_dir)
exit(1)
experiment_config = Experiments()
experiment_dict = experiment_config.get_all_experiments()
experiment_id = experiment_metadata.get('id')
if experiment_id in experiment_dict:
print_error('Invalid: experiment id already exist!')
shutil.rmtree(temp_root_dir)
exit(1)
if not os.path.exists(os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName'))):
print_error('Invalid: experiment metadata does not exist!')
shutil.rmtree(temp_root_dir)
exit(1)
# Step2. Copy nnictl metadata
src_path = os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName'))
dest_path = os.path.join(NNICTL_HOME_DIR, experiment_metadata.get('fileName'))
if os.path.exists(dest_path):
shutil.rmtree(dest_path)
shutil.copytree(src_path, dest_path)
# Step3. Copy experiment data
nni_config = Config(experiment_metadata.get('fileName'))
nnictl_exp_config = nni_config.get_config('experimentConfig')
if args.logDir:
logDir = args.logDir
nnictl_exp_config['logDir'] = logDir
else:
if nnictl_exp_config.get('logDir'):
logDir = nnictl_exp_config['logDir']
else:
logDir = NNI_HOME_DIR
os.rename(os.path.join(temp_root_dir, 'experiment'), os.path.join(temp_root_dir, experiment_id))
src_path = os.path.join(os.path.join(temp_root_dir, experiment_id))
dest_path = os.path.join(os.path.join(logDir, experiment_id))
if os.path.exists(dest_path):
shutil.rmtree(dest_path)
shutil.copytree(src_path, dest_path)
# Step4. Copy code dir
codeDir = os.path.expanduser(args.codeDir)
if not os.path.isabs(codeDir):
codeDir = os.path.join(os.getcwd(), codeDir)
print_normal('Expand codeDir to %s' % codeDir)
nnictl_exp_config['trial']['codeDir'] = codeDir
archive_code_dir = os.path.join(temp_root_dir, 'code')
if os.path.exists(archive_code_dir):
file_list = os.listdir(archive_code_dir)
for file_name in file_list:
src_path = os.path.join(archive_code_dir, file_name)
target_path = os.path.join(codeDir, file_name)
if os.path.exists(target_path):
print_error('Copy %s failed, %s exist!' % (file_name, target_path))
continue
if os.path.isdir(src_path):
shutil.copytree(src_path, target_path)
else:
shutil.copy(src_path, target_path)
# Step5. Create experiment metadata
nni_config.set_config('experimentConfig', nnictl_exp_config)
experiment_config.add_experiment(experiment_id,
experiment_metadata.get('port'),
experiment_metadata.get('startTime'),
experiment_metadata.get('fileName'),
experiment_metadata.get('platform'),
experiment_metadata.get('experimentName'),
experiment_metadata.get('endTime'),
experiment_metadata.get('status'))
print_normal('Load experiment %s succsss!' % experiment_id)
# Step6. Cleanup temp data
shutil.rmtree(temp_root_dir)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment