Unverified Commit 3ec26b40 authored by liuzhe-lz's avatar liuzhe-lz Committed by GitHub
Browse files

Merge master into dev-retiarii (#3178)

parent d165905d
...@@ -6,6 +6,8 @@ import os ...@@ -6,6 +6,8 @@ import os
import threading import threading
from enum import Enum from enum import Enum
_logger = logging.getLogger(__name__)
class CommandType(Enum): class CommandType(Enum):
# in # in
...@@ -32,7 +34,7 @@ try: ...@@ -32,7 +34,7 @@ try:
_in_file = open(3, 'rb') _in_file = open(3, 'rb')
_out_file = open(4, 'wb') _out_file = open(4, 'wb')
except OSError: except OSError:
pass _logger.debug('IPC pipeline not exists')
def send(command, data): def send(command, data):
...@@ -45,7 +47,7 @@ def send(command, data): ...@@ -45,7 +47,7 @@ def send(command, data):
_lock.acquire() _lock.acquire()
data = data.encode('utf8') data = data.encode('utf8')
msg = b'%b%014d%b' % (command.value, len(data), data) msg = b'%b%014d%b' % (command.value, len(data), data)
logging.getLogger(__name__).debug('Sending command, data: [%s]', msg) _logger.debug('Sending command, data: [%s]', msg)
_out_file.write(msg) _out_file.write(msg)
_out_file.flush() _out_file.flush()
finally: finally:
...@@ -57,14 +59,14 @@ def receive(): ...@@ -57,14 +59,14 @@ def receive():
Returns a tuple of command (CommandType) and payload (str) Returns a tuple of command (CommandType) and payload (str)
""" """
header = _in_file.read(16) header = _in_file.read(16)
logging.getLogger(__name__).debug('Received command, header: [%s]', header) _logger.debug('Received command, header: [%s]', header)
if header is None or len(header) < 16: if header is None or len(header) < 16:
# Pipe EOF encountered # Pipe EOF encountered
logging.getLogger(__name__).debug('Pipe EOF encountered') _logger.debug('Pipe EOF encountered')
return None, None return None, None
length = int(header[2:]) length = int(header[2:])
data = _in_file.read(length) data = _in_file.read(length)
command = CommandType(header[:2]) command = CommandType(header[:2])
data = data.decode('utf8') data = data.decode('utf8')
logging.getLogger(__name__).debug('Received command, data: [%s]', data) _logger.debug('Received command, data: [%s]', data)
return command, data return command, data
...@@ -85,7 +85,6 @@ def start_rest_server(port, platform, mode, experiment_id, foreground=False, log ...@@ -85,7 +85,6 @@ def start_rest_server(port, platform, mode, experiment_id, foreground=False, log
log_header = LOG_HEADER % str(time_now) log_header = LOG_HEADER % str(time_now)
stdout_file.write(log_header) stdout_file.write(log_header)
stderr_file.write(log_header) stderr_file.write(log_header)
print('## [nnictl] cmds:', cmds)
if sys.platform == 'win32': if sys.platform == 'win32':
from subprocess import CREATE_NEW_PROCESS_GROUP from subprocess import CREATE_NEW_PROCESS_GROUP
if foreground: if foreground:
...@@ -388,8 +387,6 @@ def set_experiment(experiment_config, mode, port, config_file_name): ...@@ -388,8 +387,6 @@ def set_experiment(experiment_config, mode, port, config_file_name):
{'key': 'aml_config', 'value': experiment_config['amlConfig']}) {'key': 'aml_config', 'value': experiment_config['amlConfig']})
request_data['clusterMetaData'].append( request_data['clusterMetaData'].append(
{'key': 'trial_config', 'value': experiment_config['trial']}) {'key': 'trial_config', 'value': experiment_config['trial']})
print('## experiment config:')
print(request_data)
response = rest_post(experiment_url(port), json.dumps(request_data), REST_TIME_OUT, show_error=True) response = rest_post(experiment_url(port), json.dumps(request_data), REST_TIME_OUT, show_error=True)
if check_response(response): if check_response(response):
return response return response
......
...@@ -63,14 +63,16 @@ def parse_path(experiment_config, config_path): ...@@ -63,14 +63,16 @@ def parse_path(experiment_config, config_path):
if experiment_config['trial'].get('paiConfigPath'): if experiment_config['trial'].get('paiConfigPath'):
expand_path(experiment_config['trial'], 'paiConfigPath') expand_path(experiment_config['trial'], 'paiConfigPath')
#if users use relative path, convert it to absolute path # If users use relative path, convert it to absolute path.
root_path = os.path.dirname(config_path) root_path = os.path.dirname(config_path)
if experiment_config.get('searchSpacePath'): if experiment_config.get('searchSpacePath'):
parse_relative_path(root_path, experiment_config, 'searchSpacePath') parse_relative_path(root_path, experiment_config, 'searchSpacePath')
if experiment_config.get('logDir'): if experiment_config.get('logDir'):
parse_relative_path(root_path, experiment_config, 'logDir') parse_relative_path(root_path, experiment_config, 'logDir')
if experiment_config.get('trial'): if experiment_config.get('trial'):
parse_relative_path(root_path, experiment_config['trial'], 'codeDir') # In AdaptDL mode, 'codeDir' shouldn't be parsed because it points to the path in the container.
if experiment_config.get('trainingServicePlatform') != 'adl':
parse_relative_path(root_path, experiment_config['trial'], 'codeDir')
if experiment_config['trial'].get('authFile'): if experiment_config['trial'].get('authFile'):
parse_relative_path(root_path, experiment_config['trial'], 'authFile') parse_relative_path(root_path, experiment_config['trial'], 'authFile')
if experiment_config['trial'].get('ps'): if experiment_config['trial'].get('ps'):
......
...@@ -50,7 +50,7 @@ def parse_args(): ...@@ -50,7 +50,7 @@ def parse_args():
# parse start command # parse start command
parser_start = subparsers.add_parser('create', help='create a new experiment') parser_start = subparsers.add_parser('create', help='create a new experiment')
parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file') parser_start.add_argument('--config', '-c', required=True, dest='config', help='the path of yaml config file')
parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server') parser_start.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', type=int, help='the port of restful server')
parser_start.add_argument('--debug', '-d', action='store_true', help=' set debug mode') parser_start.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
parser_start.add_argument('--foreground', '-f', action='store_true', help=' set foreground mode, print log content to terminal') parser_start.add_argument('--foreground', '-f', action='store_true', help=' set foreground mode, print log content to terminal')
parser_start.set_defaults(func=create_experiment) parser_start.set_defaults(func=create_experiment)
...@@ -58,7 +58,7 @@ def parse_args(): ...@@ -58,7 +58,7 @@ def parse_args():
# parse resume command # parse resume command
parser_resume = subparsers.add_parser('resume', help='resume a new experiment') parser_resume = subparsers.add_parser('resume', help='resume a new experiment')
parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume') parser_resume.add_argument('id', nargs='?', help='The id of the experiment you want to resume')
parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server') parser_resume.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', type=int, help='the port of restful server')
parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode') parser_resume.add_argument('--debug', '-d', action='store_true', help=' set debug mode')
parser_resume.add_argument('--foreground', '-f', action='store_true', help=' set foreground mode, print log content to terminal') parser_resume.add_argument('--foreground', '-f', action='store_true', help=' set foreground mode, print log content to terminal')
parser_resume.set_defaults(func=resume_experiment) parser_resume.set_defaults(func=resume_experiment)
...@@ -66,7 +66,7 @@ def parse_args(): ...@@ -66,7 +66,7 @@ def parse_args():
# parse view command # parse view command
parser_view = subparsers.add_parser('view', help='view a stopped experiment') parser_view = subparsers.add_parser('view', help='view a stopped experiment')
parser_view.add_argument('id', nargs='?', help='The id of the experiment you want to view') parser_view.add_argument('id', nargs='?', help='The id of the experiment you want to view')
parser_view.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', help='the port of restful server') parser_view.add_argument('--port', '-p', default=DEFAULT_REST_PORT, dest='port', type=int, help='the port of restful server')
parser_view.set_defaults(func=view_experiment) parser_view.set_defaults(func=view_experiment)
# parse update command # parse update command
...@@ -93,7 +93,7 @@ def parse_args(): ...@@ -93,7 +93,7 @@ def parse_args():
#parse stop command #parse stop command
parser_stop = subparsers.add_parser('stop', help='stop the experiment') parser_stop = subparsers.add_parser('stop', help='stop the experiment')
parser_stop.add_argument('id', nargs='?', help='the id of experiment, use \'all\' to stop all running experiments') parser_stop.add_argument('id', nargs='?', help='the id of experiment, use \'all\' to stop all running experiments')
parser_stop.add_argument('--port', '-p', dest='port', help='the port of restful server') parser_stop.add_argument('--port', '-p', dest='port', type=int, help='the port of restful server')
parser_stop.add_argument('--all', '-a', action='store_true', help='stop all of experiments') parser_stop.add_argument('--all', '-a', action='store_true', help='stop all of experiments')
parser_stop.set_defaults(func=stop_experiment) parser_stop.set_defaults(func=stop_experiment)
...@@ -239,7 +239,7 @@ def parse_args(): ...@@ -239,7 +239,7 @@ def parse_args():
parser_tensorboard_start = parser_tensorboard_subparsers.add_parser('start', help='start tensorboard') parser_tensorboard_start = parser_tensorboard_subparsers.add_parser('start', help='start tensorboard')
parser_tensorboard_start.add_argument('id', nargs='?', help='the id of experiment') parser_tensorboard_start.add_argument('id', nargs='?', help='the id of experiment')
parser_tensorboard_start.add_argument('--trial_id', '-T', dest='trial_id', help='the id of trial') parser_tensorboard_start.add_argument('--trial_id', '-T', dest='trial_id', help='the id of trial')
parser_tensorboard_start.add_argument('--port', dest='port', default=6006, help='the port to start tensorboard') parser_tensorboard_start.add_argument('--port', dest='port', default=6006, type=int, help='the port to start tensorboard')
parser_tensorboard_start.set_defaults(func=start_tensorboard) parser_tensorboard_start.set_defaults(func=start_tensorboard)
parser_tensorboard_stop = parser_tensorboard_subparsers.add_parser('stop', help='stop tensorboard') parser_tensorboard_stop = parser_tensorboard_subparsers.add_parser('stop', help='stop tensorboard')
parser_tensorboard_stop.add_argument('id', nargs='?', help='the id of experiment') parser_tensorboard_stop.add_argument('id', nargs='?', help='the id of experiment')
......
...@@ -129,7 +129,7 @@ def parse_ids(args): ...@@ -129,7 +129,7 @@ def parse_ids(args):
return running_experiment_list return running_experiment_list
if args.port is not None: if args.port is not None:
for key in running_experiment_list: for key in running_experiment_list:
if str(experiment_dict[key]['port']) == args.port: if experiment_dict[key]['port'] == args.port:
result_list.append(key) result_list.append(key)
if args.id and result_list and args.id != result_list[0]: if args.id and result_list and args.id != result_list[0]:
print_error('Experiment id and resful server port not match') print_error('Experiment id and resful server port not match')
......
...@@ -9,8 +9,6 @@ from .common_utils import print_error ...@@ -9,8 +9,6 @@ from .common_utils import print_error
def rest_put(url, data, timeout, show_error=False): def rest_put(url, data, timeout, show_error=False):
'''Call rest put method''' '''Call rest put method'''
print('## [nnictl] PUT', url)
print(data)
try: try:
response = requests.put(url, headers={'Accept': 'application/json', 'Content-Type': 'application/json'},\ response = requests.put(url, headers={'Accept': 'application/json', 'Content-Type': 'application/json'},\
data=data, timeout=timeout) data=data, timeout=timeout)
...@@ -22,8 +20,6 @@ def rest_put(url, data, timeout, show_error=False): ...@@ -22,8 +20,6 @@ def rest_put(url, data, timeout, show_error=False):
def rest_post(url, data, timeout, show_error=False): def rest_post(url, data, timeout, show_error=False):
'''Call rest post method''' '''Call rest post method'''
print('## [nnictl] POST', url)
print(data)
try: try:
response = requests.post(url, headers={'Accept': 'application/json', 'Content-Type': 'application/json'},\ response = requests.post(url, headers={'Accept': 'application/json', 'Content-Type': 'application/json'},\
data=data, timeout=timeout) data=data, timeout=timeout)
......
...@@ -134,7 +134,6 @@ def start_tensorboard(args): ...@@ -134,7 +134,6 @@ def start_tensorboard(args):
if experiment_dict[args.id]["status"] == "STOPPED": if experiment_dict[args.id]["status"] == "STOPPED":
print_error("Experiment {} is stopped...".format(args.id)) print_error("Experiment {} is stopped...".format(args.id))
return return
config_file_name = experiment_dict[experiment_id]['fileName']
nni_config = Config(args.id) nni_config = Config(args.id)
if nni_config.get_config('experimentConfig').get('trainingServicePlatform') == 'adl': if nni_config.get_config('experimentConfig').get('trainingServicePlatform') == 'adl':
adl_tensorboard_helper(args) adl_tensorboard_helper(args)
......
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
jobs:
- job: linux
pool: NNI CI GPU3
timeoutInMinutes: 120
steps:
- script: |
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin"
echo "##vso[task.setvariable variable=NNI_RELEASE]999.$(date -u +%Y%m%d%H%M%S)"
python3 -m pip install -U --upgrade pip setuptools
python3 -m pip install -U pytest
displayName: Prepare
- script: |
set -e
python3 setup.py build_ts
python3 setup.py bdist_wheel -p manylinux1_x86_64
python3 -m pip install dist/nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl
displayName: Install NNI
- script: |
set -e
python3 -m pip install -U scikit-learn==0.23.2
python3 -m pip install -U torchvision==0.4.2
python3 -m pip install -U torch==1.3.1
python3 -m pip install -U keras==2.1.6
python3 -m pip install -U tensorflow==2.3.1 tensorflow-estimator==2.3.0
python3 -m pip install -U thop
sudo apt-get install swig -y
nnictl package install --name=SMAC
nnictl package install --name=BOHB
nnictl package install --name=PPOTuner
displayName: Install extra dependencies
- script: |
set -e
cd examples/tuners/customized_tuner
python3 setup.py develop --user
nnictl package install .
displayName: Install customized tuner
- script: |
set -e
(cd test && python3 -m pytest ut)
export PATH=$PATH:$PWD/toolchain/yarn/bin
export CI=true
(cd ts/nni_manager && yarn test)
(cd ts/nasui && yarn test)
displayName: Unit test
continueOnError: true
- script: |
cd test
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local
displayName: Integration test
continueOnError: true
- script: |
cd test
source scripts/nas.sh
displayName: NAS test
continueOnError: true
- script: |
cd test
source scripts/model_compression.sh
displayName: Model compression test
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
jobs:
- job: local_windows
pool: NNI CI WINDOWS2
timeoutInMinutes: 120
steps:
- script: |
python -m pip install -U --upgrade pip setuptools
python -m pip install -U pytest
displayName: Install Python tools
- script: |
python -m pip uninstall nni --yes
set NNI_RELEASE=999.0
python setup.py build_ts
python setup.py bdist_wheel -p win_amd64
python -m pip install dist/nni-999.0-py3-none-win_amd64.whl
displayName: Install NNI
- script: |
python -m pip install -U scikit-learn==0.23.2
python -m pip install -U keras==2.1.6
python -m pip install -U torchvision===0.4.1 torch===1.3.1 -f https://download.pytorch.org/whl/torch_stable.html
python -m pip install -U tensorflow==2.3.1 tensorflow-estimator==2.3.0
nnictl package install --name=PPOTuner
displayName: Install extra dependencies
- script: |
cd examples/tuners/customized_tuner
python setup.py develop --user
nnictl package install .
displayName: Install example customized tuner
- script: |
cd test
python -m pytest ut
echo "TODO: TypeScript UT"
displayName: Unit test
continueOnError: true
- script: |
cd test
python nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts local
displayName: Integration test
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
# variables set on VSO: (mostly for security concern)
# pai_user
# pai_token
# manager_ip
# docker_hub_password
jobs:
- job: pai
pool: NNI CI PAI CLI
timeoutInMinutes: 120
steps:
- script: |
export NNI_RELEASE=999.$(date -u +%Y%m%d%H%M%S)
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin"
echo "##vso[task.setvariable variable=NNI_RELEASE]${NNI_RELEASE}"
echo "Working directory: ${PWD}"
echo "NNI version: ${NNI_RELEASE}"
echo "Build docker image: $(build_docker_image)"
python3 -m pip install -U --upgrade pip setuptools
displayName: Prepare
- script: |
set -e
python3 setup.py build_ts
python3 setup.py bdist_wheel -p manylinux1_x86_64
python3 -m pip install -U dist/nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl
displayName: Build and install NNI
- script: |
set -e
sudo apt-get install swig -y
nnictl package install --name=SMAC
nnictl package install --name=BOHB
displayName: Install extra tuners
- script: |
set -e
cd examples/tuners/customized_tuner
python3 setup.py develop --user
nnictl package install .
displayName: Install customized tuner
- script: |
set -e
docker login -u nnidev -p $(docker_hub_password)
echo '## Build docker image ##'
docker build --build-arg NNI_RELEASE=${NNI_RELEASE} -t nnidev/nni-it-pai:latest .
echo '## Upload docker image ##'
docker push nnidev/nni-it-pai:latest
condition: eq(variables['build_docker_image'], 'true')
displayName: Build and upload docker image
- script: |
set -e
cd test
python3 nni_test/nnitest/generate_ts_config.py \
--ts pai \
--pai_reuse false \
--pai_host https://ne.openpai.org \
--pai_user $(pai_user) \
--nni_docker_image nnidev/nni-it-pai:latest \
--pai_storage_config_name confignfs-data \
--pai_token $(pai_token) \
--nni_manager_nfs_mount_path /home/quzha/mnt-pai-ne/shinyang3 \
--container_nfs_mount_path /mnt/confignfs-data/shinyang3 \
--nni_manager_ip $(manager_ip) \
--vc nni
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName: Integration test
- script: |
set -e
cd test
python3 nni_test/nnitest/generate_ts_config.py \
--ts pai \
--pai_reuse true \
--pai_host https://ne.openpai.org \
--pai_user $(pai_user) \
--nni_docker_image nnidev/nni-it-pai:latest \
--pai_storage_config_name confignfs-data \
--pai_token $(pai_token) \
--nni_manager_nfs_mount_path /home/quzha/mnt-pai-ne/shinyang3 \
--container_nfs_mount_path /mnt/confignfs-data/shinyang3 \
--nni_manager_ip $(manager_ip) \
--vc nni
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts pai
displayName: Integration test (reuse mode)
trigger: none
pr: none
schedules:
- cron: 0 16 * * *
branches:
include: [ master ]
variables:
worker: remote_nni-ci-gpu-03
# variables set on VSO: (for security concern)
# manager_ip
# worker_ip
# password_in_docker
jobs:
- job: remote_linux2linux
pool: NNI CI REMOTE CLI
timeoutInMinutes: 120
steps:
- script: |
export NNI_RELEASE=999.$(date -u +%Y%m%d%H%M%S)
echo "##vso[task.setvariable variable=PATH]${PATH}:${HOME}/.local/bin"
echo "##vso[task.setvariable variable=NNI_RELEASE]${NNI_RELEASE}"
echo "Working directory: ${PWD}"
echo "NNI version: ${NNI_RELEASE}"
python3 -m pip install -U --upgrade pip setuptools
displayName: Prepare
- script: |
set -e
python3 setup.py build_ts
python3 setup.py bdist_wheel -p manylinux1_x86_64
python3 -m pip install dist/nni-${NNI_RELEASE}-py3-none-manylinux1_x86_64.whl
displayName: Install NNI
- script: |
set -e
sudo apt-get install swig -y
nnictl package install --name=SMAC
nnictl package install --name=BOHB
displayName: Install extra tuners
- script: |
set -e
cd examples/tuners/customized_tuner
python3 setup.py develop --user
nnictl package install .
displayName: Install customized tuner
- task: CopyFilesOverSSH@0
inputs:
sshEndpoint: $(worker)
sourceFolder: dist
targetFolder: /tmp/nnitest/$(Build.BuildId)/dist
overwrite: true
displayName: Copy wheel to remote machine
timeoutInMinutes: 10
- task: CopyFilesOverSSH@0
inputs:
sshEndpoint: $(worker)
sourceFolder: test
targetFolder: /tmp/nnitest/$(Build.BuildId)/test
overwrite: true
displayName: Copy test scripts to remote machine
timeoutInMinutes: 10
- task: SSH@0
inputs:
sshEndpoint: $(worker)
runOptions: commands
commands: |
python3 /tmp/nnitest/$(Build.BuildId)/test/nni_test/nnitest/remote_docker.py --mode start --name $(Build.BuildId) --image nni/nni
echo "##vso[task.setvariable variable=docker_port]$(cat /tmp/nnitest/$(Build.BuildId)/port)"
displayName: Start docker
- script: |
cd test
python3 nni_test/nnitest/generate_ts_config.py \
--ts remote \
--remote_reuse false \
--remote_user nni \
--remote_host $(worker_ip) \
--remote_port $(docker_port) \
--remote_pwd $(password_in_docker) \
--nni_manager_ip $(manager_ip)
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote
displayName: Integration test
- script: |
cd test
python3 nni_test/nnitest/generate_ts_config.py \
--ts remote \
--remote_reuse true \
--remote_user nni \
--remote_host $(worker_ip) \
--remote_port $(docker_port) \
--remote_pwd $(password_in_docker) \
--nni_manager_ip $(manager_ip)
python3 nni_test/nnitest/run_tests.py --config config/integration_tests.yml --ts remote
displayName: Integration test (reuse mode)
- task: SSH@0
inputs:
sshEndpoint: $(worker)
runOptions: commands
commands: python3 /tmp/nnitest/$(Build.BuildId)/test/nni_test/nnitest/remote_docker.py --mode stop --name $(Build.BuildId)
displayName: Stop docker
...@@ -47,4 +47,4 @@ ignore-patterns=test* ...@@ -47,4 +47,4 @@ ignore-patterns=test*
# List of members which are set dynamically and missed by pylint inference # List of members which are set dynamically and missed by pylint inference
generated-members=numpy.*,torch.*,tensorflow.* generated-members=numpy.*,torch.*,tensorflow.*
ignored-modules=tensorflow,_win32,msvcrt ignored-modules=tensorflow,_winapi,msvcrt
...@@ -61,7 +61,6 @@ dependencies = [ ...@@ -61,7 +61,6 @@ dependencies = [
'hyperopt==0.1.2', 'hyperopt==0.1.2',
'json_tricks', 'json_tricks',
'netifaces', 'netifaces',
'numpy',
'psutil', 'psutil',
'ruamel.yaml', 'ruamel.yaml',
'requests', 'requests',
...@@ -74,10 +73,13 @@ dependencies = [ ...@@ -74,10 +73,13 @@ dependencies = [
'pkginfo', 'pkginfo',
'websockets', 'websockets',
'filelock', 'filelock',
'prettytable' 'prettytable',
'dataclasses ; python_version < "3.7"',
'numpy < 1.19.4 ; sys_platform == "win32"',
'numpy < 1.20 ; sys_platform != "win32" and python_version < "3.7"',
'numpy ; sys.platform != "win32" and python_version >= "3.7"'
] ]
release = os.environ.get('NNI_RELEASE') release = os.environ.get('NNI_RELEASE')
def _setup(): def _setup():
...@@ -102,7 +104,7 @@ def _setup(): ...@@ -102,7 +104,7 @@ def _setup():
packages = _find_python_packages(), packages = _find_python_packages(),
package_data = { package_data = {
'nni': ['**/requirements.txt'], 'nni': _find_requirements_txt(), # must do this manually due to setuptools issue #1806
'nni_node': _find_node_files() # note: this does not work before building 'nni_node': _find_node_files() # note: this does not work before building
}, },
...@@ -128,19 +130,26 @@ def _setup(): ...@@ -128,19 +130,26 @@ def _setup():
def _find_python_packages(): def _find_python_packages():
packages = [] packages = []
for dirpath, dirnames, filenames in os.walk('nni'): for dirpath, dirnames, filenames in os.walk('nni'):
if '/__pycache__' not in dirpath: if '/__pycache__' not in dirpath and '/.mypy_cache' not in dirpath:
packages.append(dirpath.replace('/', '.')) packages.append(dirpath.replace('/', '.'))
return sorted(packages) + ['nni_node'] return sorted(packages) + ['nni_node']
def _find_requirements_txt():
requirement_files = []
for dirpath, dirnames, filenames in os.walk('nni'):
if 'requirements.txt' in filenames:
requirement_files.append(os.path.join(dirpath[len('nni/'):], 'requirements.txt'))
return requirement_files
def _find_node_files(): def _find_node_files():
if not os.path.exists('nni_node'): if not os.path.exists('nni_node'):
if release and 'built_ts' not in sys.argv: if release and 'build_ts' not in sys.argv:
sys.exit('ERROR: To build a release version, run "python setup.py built_ts" first') sys.exit('ERROR: To build a release version, run "python setup.py build_ts" first')
return [] return []
files = [] files = []
for dirpath, dirnames, filenames in os.walk('nni_node'): for dirpath, dirnames, filenames in os.walk('nni_node'):
for filename in filenames: for filename in filenames:
files.append((dirpath + '/' + filename)[len('nni_node/'):]) files.append(os.path.join(dirpath[len('nni_node/'):], filename))
if '__init__.py' in files: if '__init__.py' in files:
files.remove('__init__.py') files.remove('__init__.py')
return sorted(files) return sorted(files)
...@@ -165,21 +174,24 @@ class BuildTs(Command): ...@@ -165,21 +174,24 @@ class BuildTs(Command):
class Build(build): class Build(build):
def run(self): def run(self):
assert release, 'Please set environment variable "NNI_RELEASE=<release_version>"' if not release:
assert os.path.isfile('nni_node/main.js'), 'Please run "build_ts" before "build"' sys.exit('Please set environment variable "NNI_RELEASE=<release_version>"')
assert not os.path.islink('nni_node/main.js'), 'This is a development build' if os.path.islink('nni_node/main.js'):
sys.exit('A development build already exists. Please uninstall NNI and run "python3 setup.py clean --all".')
super().run() super().run()
class Develop(develop): class Develop(develop):
user_options = develop.user_options + [ user_options = develop.user_options + [
('no-user', None, 'Prevent automatically adding "--user"') ('no-user', None, 'Prevent automatically adding "--user"'),
('skip-ts', None, 'Prevent building TypeScript modules')
] ]
boolean_options = develop.boolean_options + ['no-user'] boolean_options = develop.boolean_options + ['no-user', 'skip-ts']
def initialize_options(self): def initialize_options(self):
super().initialize_options() super().initialize_options()
self.no_user = None self.no_user = None
self.skip_ts = None
def finalize_options(self): def finalize_options(self):
# if `--user` or `--no-user` is explicitly set, do nothing # if `--user` or `--no-user` is explicitly set, do nothing
...@@ -189,7 +201,8 @@ class Develop(develop): ...@@ -189,7 +201,8 @@ class Develop(develop):
super().finalize_options() super().finalize_options()
def run(self): def run(self):
setup_ts.build(release=None) if not self.skip_ts:
setup_ts.build(release=None)
super().run() super().run()
class Clean(clean): class Clean(clean):
...@@ -224,4 +237,5 @@ _temp_files = [ ...@@ -224,4 +237,5 @@ _temp_files = [
] ]
_setup() if __name__ == '__main__':
_setup()
authorName: nni
experimentName: default_test
maxExecDuration: 5m
maxTrialNum: 4
trialConcurrency: 2
searchSpacePath: ./mnist_search_space.json
tuner:
builtinTunerName: Random
assessor:
builtinAssessorName: Medianstop
classArgs:
optimize_mode: maximize
trial:
codeDir: ../../../examples/trials/mnist-tfv2
command: python3 mnist.py
useAnnotation: false
multiPhase: false
multiThread: false
trainingServicePlatform: local
...@@ -37,17 +37,12 @@ testCases: ...@@ -37,17 +37,12 @@ testCases:
- name: sklearn-regression - name: sklearn-regression
configFile: test/config/examples/sklearn-regression.yml configFile: test/config/examples/sklearn-regression.yml
- name: mnist-tfv1 - name: mnist-tensorflow
configFile: test/config/examples/mnist-tfv1.yml configFile: test/config/examples/mnist-tfv2.yml
config: config:
maxTrialNum: 1 maxTrialNum: 1
trialConcurrency: 1 trialConcurrency: 1
trainingService: local remote # FIXME: timeout on pai, looks like tensorflow failed to link CUDA
- name: mnist-keras
configFile: test/config/examples/mnist-keras.yml
config:
maxTrialNum: 2
trialConcurrency: 1
- name: mnist-pytorch-local - name: mnist-pytorch-local
configFile: test/config/examples/mnist-pytorch.yml configFile: test/config/examples/mnist-pytorch.yml
...@@ -61,11 +56,12 @@ testCases: ...@@ -61,11 +56,12 @@ testCases:
launchCommand: nnictl create --config $configFile --debug launchCommand: nnictl create --config $configFile --debug
trainingService: remote pai kubeflow frameworkcontroller dlts trainingService: remote pai kubeflow frameworkcontroller dlts
- name: mnist-annotation # TODO: move this and following commented test cases to pytorch or tf2
configFile: test/config/examples/mnist-annotation.yml #- name: mnist-annotation
config: # configFile: test/config/examples/mnist-annotation.yml
maxTrialNum: 1 # config:
trialConcurrency: 1 # maxTrialNum: 1
# trialConcurrency: 1
- name: cifar10-pytorch - name: cifar10-pytorch
configFile: test/config/examples/cifar10-pytorch.yml configFile: test/config/examples/cifar10-pytorch.yml
...@@ -79,8 +75,8 @@ testCases: ...@@ -79,8 +75,8 @@ testCases:
command: python3 main.py --epochs 1 --batches 1 command: python3 main.py --epochs 1 --batches 1
gpuNum: 0 gpuNum: 0
- name: nested-ss #- name: nested-ss
configFile: test/config/examples/mnist-nested-search-space.yml # configFile: test/config/examples/mnist-nested-search-space.yml
- name: classic-nas-gen-ss - name: classic-nas-gen-ss
configFile: test/config/examples/classic-nas-pytorch.yml configFile: test/config/examples/classic-nas-pytorch.yml
...@@ -147,8 +143,8 @@ testCases: ...@@ -147,8 +143,8 @@ testCases:
config: config:
maxTrialNum: 4 maxTrialNum: 4
trialConcurrency: 4 trialConcurrency: 4
launchCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.start_experiment("$configFile")' launchCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
stopCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()' stopCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
validator: validator:
class: NnicliValidator class: NnicliValidator
platform: linux darwin platform: linux darwin
...@@ -215,40 +211,40 @@ testCases: ...@@ -215,40 +211,40 @@ testCases:
######################################################################### #########################################################################
# nni tuners test # nni tuners test
######################################################################### #########################################################################
- name: tuner-annel #- name: tuner-annel
configFile: test/config/tuners/anneal.yml # configFile: test/config/tuners/anneal.yml
- name: tuner-evolution #- name: tuner-evolution
configFile: test/config/tuners/evolution.yml # configFile: test/config/tuners/evolution.yml
- name: tuner-random #- name: tuner-random
configFile: test/config/tuners/random.yml # configFile: test/config/tuners/random.yml
- name: tuner-smac #- name: tuner-smac
configFile: test/config/tuners/smac.yml # configFile: test/config/tuners/smac.yml
platform: linux darwin # platform: linux darwin
- name: tuner-tpe #- name: tuner-tpe
configFile: test/config/tuners/tpe.yml # configFile: test/config/tuners/tpe.yml
- name: tuner-batch #- name: tuner-batch
configFile: test/config/tuners/batch.yml # configFile: test/config/tuners/batch.yml
- name: tuner-bohb #- name: tuner-bohb
configFile: test/config/tuners/bohb.yml # configFile: test/config/tuners/bohb.yml
platform: linux darwin # platform: linux darwin
- name: tuner-gp #- name: tuner-gp
configFile: test/config/tuners/gp.yml # configFile: test/config/tuners/gp.yml
- name: tuner-grid #- name: tuner-grid
configFile: test/config/tuners/gridsearch.yml # configFile: test/config/tuners/gridsearch.yml
- name: tuner-hyperband #- name: tuner-hyperband
configFile: test/config/tuners/hyperband.yml # configFile: test/config/tuners/hyperband.yml
- name: tuner-metis #- name: tuner-metis
configFile: test/config/tuners/metis.yml # configFile: test/config/tuners/metis.yml
- name: tuner-regularized_evolution - name: tuner-regularized_evolution
configFile: test/config/tuners/regularized_evolution_tuner.yml configFile: test/config/tuners/regularized_evolution_tuner.yml
......
...@@ -110,8 +110,8 @@ testCases: ...@@ -110,8 +110,8 @@ testCases:
config: config:
maxTrialNum: 4 maxTrialNum: 4
trialConcurrency: 4 trialConcurrency: 4
launchCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.start_experiment("$configFile")' launchCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
stopCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()' stopCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
validator: validator:
class: NnicliValidator class: NnicliValidator
platform: linux darwin platform: linux darwin
......
...@@ -47,8 +47,8 @@ testCases: ...@@ -47,8 +47,8 @@ testCases:
config: config:
maxTrialNum: 4 maxTrialNum: 4
trialConcurrency: 4 trialConcurrency: 4
launchCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.start_experiment("$configFile")' launchCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.start_experiment("$configFile")'
stopCommand: python3 -c 'from nni.experiment import Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()' stopCommand: python3 -c 'from nni.experiment import ExternalExperiment as Experiment; exp = Experiment(); exp.connect_experiment("http://localhost:8080/"); exp.stop_experiment()'
validator: validator:
class: NnicliValidator class: NnicliValidator
platform: linux darwin platform: linux darwin
......
...@@ -6,7 +6,7 @@ from os import remove ...@@ -6,7 +6,7 @@ from os import remove
import subprocess import subprocess
import json import json
import requests import requests
from nni.experiment import Experiment from nni.experiment import ExternalExperiment as Experiment
from nni.tools.nnictl.updater import load_search_space from nni.tools.nnictl.updater import load_search_space
from utils import METRICS_URL, GET_IMPORTED_DATA_URL from utils import METRICS_URL, GET_IMPORTED_DATA_URL
......
...@@ -13,8 +13,8 @@ from torchvision.models.resnet import resnet18 ...@@ -13,8 +13,8 @@ from torchvision.models.resnet import resnet18
import unittest import unittest
from unittest import TestCase, main from unittest import TestCase, main
from nni.compression.pytorch import ModelSpeedup from nni.compression.pytorch import ModelSpeedup, apply_compression_results
from nni.algorithms.compression.pytorch.pruning import L1FilterPruner, apply_compression_results from nni.algorithms.compression.pytorch.pruning import L1FilterPruner
from nni.algorithms.compression.pytorch.pruning.weight_masker import WeightMasker from nni.algorithms.compression.pytorch.pruning.weight_masker import WeightMasker
from nni.algorithms.compression.pytorch.pruning.one_shot import _StructuredFilterPruner from nni.algorithms.compression.pytorch.pruning.one_shot import _StructuredFilterPruner
...@@ -30,13 +30,17 @@ RELATIVE_THRESHOLD = 0.01 ...@@ -30,13 +30,17 @@ RELATIVE_THRESHOLD = 0.01
# an absolute threshold to determine whether the final result is correct. # an absolute threshold to determine whether the final result is correct.
# The error should meet the RELATIVE_THREHOLD or the ABSOLUTE_THRESHOLD. # The error should meet the RELATIVE_THREHOLD or the ABSOLUTE_THRESHOLD.
ABSOLUTE_THRESHOLD = 0.0001 ABSOLUTE_THRESHOLD = 0.0001
class BackboneModel1(nn.Module): class BackboneModel1(nn.Module):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.conv1 = nn.Conv2d(1, 1, 1, 1) self.conv1 = nn.Conv2d(1, 1, 1, 1)
def forward(self, x): def forward(self, x):
return self.conv1(x) return self.conv1(x)
class BackboneModel2(torch.nn.Module): class BackboneModel2(torch.nn.Module):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
...@@ -53,32 +57,58 @@ class BackboneModel2(torch.nn.Module): ...@@ -53,32 +57,58 @@ class BackboneModel2(torch.nn.Module):
x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn2(self.conv2(x)))
x = F.max_pool2d(x, 2, 2) x = F.max_pool2d(x, 2, 2)
x = x.view(x.size(0), -1) x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x)) x = F.relu(self.fc1(x))
x = self.fc2(x) x = self.fc2(x)
return x return x
class BigModel(torch.nn.Module): class BigModel(torch.nn.Module):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
self.backbone1 = BackboneModel1() self.backbone1 = BackboneModel1()
self.backbone2 = BackboneModel2() self.backbone2 = BackboneModel2()
self.fc3 = nn.Sequential( self.fc3 = nn.Sequential(
nn.Linear(10, 10), nn.Linear(10, 10),
nn.BatchNorm1d(10), nn.BatchNorm1d(10),
nn.ReLU(inplace=True), nn.ReLU(inplace=True),
nn.Linear(10, 2) nn.Linear(10, 2)
) )
def forward(self, x): def forward(self, x):
x = self.backbone1(x) x = self.backbone1(x)
x = self.backbone2(x) x = self.backbone2(x)
x = self.fc3(x) x = self.fc3(x)
return x return x
class TransposeModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(3, 20, 5)
self.conv2 = nn.ConvTranspose2d(20, 50, 5, groups=2)
self.bn1 = nn.BatchNorm2d(self.conv1.out_channels)
self.bn2 = nn.BatchNorm2d(self.conv2.out_channels)
self.fc1 = nn.Linear(8 * 8 * 50, 500)
self.fc2 = nn.Linear(500, 10)
def forward(self, x):
x = F.relu(self.bn1(self.conv1(x)))
# x = F.max_pool2d(x, 2, 2)
x = F.relu(self.bn2(self.conv2(x)))
# x = F.max_pool2d(x, 2, 2)
x = x.view(x.size(0), -1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
dummy_input = torch.randn(2, 1, 28, 28) dummy_input = torch.randn(2, 1, 28, 28)
SPARSITY = 0.5 SPARSITY = 0.5
MODEL_FILE, MASK_FILE = './11_model.pth', './l1_mask.pth' MODEL_FILE, MASK_FILE = './11_model.pth', './l1_mask.pth'
def prune_model_l1(model): def prune_model_l1(model):
config_list = [{ config_list = [{
'sparsity': SPARSITY, 'sparsity': SPARSITY,
...@@ -88,6 +118,7 @@ def prune_model_l1(model): ...@@ -88,6 +118,7 @@ def prune_model_l1(model):
pruner.compress() pruner.compress()
pruner.export_model(model_path=MODEL_FILE, mask_path=MASK_FILE) pruner.export_model(model_path=MODEL_FILE, mask_path=MASK_FILE)
def generate_random_sparsity(model): def generate_random_sparsity(model):
cfg_list = [] cfg_list = []
for name, module in model.named_modules(): for name, module in model.named_modules():
...@@ -97,18 +128,20 @@ def generate_random_sparsity(model): ...@@ -97,18 +128,20 @@ def generate_random_sparsity(model):
'sparsity': sparsity}) 'sparsity': sparsity})
return cfg_list return cfg_list
def zero_bn_bias(model): def zero_bn_bias(model):
with torch.no_grad(): with torch.no_grad():
for name, module in model.named_modules(): for name, module in model.named_modules():
if isinstance(module, nn.BatchNorm2d) \ if isinstance(module, nn.BatchNorm2d) \
or isinstance(module, nn.BatchNorm3d) \ or isinstance(module, nn.BatchNorm3d) \
or isinstance(module, nn.BatchNorm1d): or isinstance(module, nn.BatchNorm1d):
shape = module.bias.data.size() shape = module.bias.data.size()
device = module.bias.device device = module.bias.device
module.bias.data = torch.zeros(shape).to(device) module.bias.data = torch.zeros(shape).to(device)
shape = module.running_mean.data.size() shape = module.running_mean.data.size()
module.running_mean = torch.zeros(shape).to(device) module.running_mean = torch.zeros(shape).to(device)
class L1ChannelMasker(WeightMasker): class L1ChannelMasker(WeightMasker):
def __init__(self, model, pruner): def __init__(self, model, pruner):
self.model = model self.model = model
...@@ -143,21 +176,27 @@ class L1ChannelMasker(WeightMasker): ...@@ -143,21 +176,27 @@ class L1ChannelMasker(WeightMasker):
w_abs = weight.abs() w_abs = weight.abs()
if wrapper.type == 'Conv2d': if wrapper.type == 'Conv2d':
w_abs_structured = w_abs.sum((0, 2, 3)) w_abs_structured = w_abs.sum((0, 2, 3))
threshold = torch.topk(w_abs_structured, num_prune, largest=False)[0].max() threshold = torch.topk(
mask_weight = torch.gt(w_abs_structured, threshold)[None, :, None, None].expand_as(weight).type_as(weight) w_abs_structured, num_prune, largest=False)[0].max()
mask_weight = torch.gt(w_abs_structured, threshold)[
None, :, None, None].expand_as(weight).type_as(weight)
return {'weight_mask': mask_weight.detach()} return {'weight_mask': mask_weight.detach()}
else: else:
# Linear # Linear
assert wrapper.type == 'Linear' assert wrapper.type == 'Linear'
w_abs_structured = w_abs.sum((0)) w_abs_structured = w_abs.sum((0))
threshold = torch.topk(w_abs_structured, num_prune, largest=False)[0].max() threshold = torch.topk(
mask_weight = torch.gt(w_abs_structured, threshold)[None, :].expand_as(weight).type_as(weight) w_abs_structured, num_prune, largest=False)[0].max()
mask_weight = torch.gt(w_abs_structured, threshold)[
None, :].expand_as(weight).type_as(weight)
return {'weight_mask': mask_weight.detach(), 'bias_mask': mask_bias} return {'weight_mask': mask_weight.detach(), 'bias_mask': mask_bias}
class L1ChannelPruner(_StructuredFilterPruner): class L1ChannelPruner(_StructuredFilterPruner):
def __init__(self, model, config_list, optimizer=None, dependency_aware=False, dummy_input=None): def __init__(self, model, config_list, optimizer=None, dependency_aware=False, dummy_input=None):
super().__init__(model, config_list, pruning_algorithm='l1', optimizer=optimizer, super().__init__(model, config_list, pruning_algorithm='l1', optimizer=optimizer,
dependency_aware=dependency_aware, dummy_input=dummy_input) dependency_aware=dependency_aware, dummy_input=dummy_input)
def validate_config(self, model, config_list): def validate_config(self, model, config_list):
pass pass
...@@ -177,6 +216,7 @@ def channel_prune(model): ...@@ -177,6 +216,7 @@ def channel_prune(model):
pruner.compress() pruner.compress()
pruner.export_model(model_path=MODEL_FILE, mask_path=MASK_FILE) pruner.export_model(model_path=MODEL_FILE, mask_path=MASK_FILE)
class SpeedupTestCase(TestCase): class SpeedupTestCase(TestCase):
def test_speedup_vgg16(self): def test_speedup_vgg16(self):
prune_model_l1(vgg16()) prune_model_l1(vgg16())
...@@ -187,8 +227,10 @@ class SpeedupTestCase(TestCase): ...@@ -187,8 +227,10 @@ class SpeedupTestCase(TestCase):
orig_model = vgg16() orig_model = vgg16()
assert model.training assert model.training
assert model.features[2].out_channels == int(orig_model.features[2].out_channels * SPARSITY) assert model.features[2].out_channels == int(
assert model.classifier[0].in_features == int(orig_model.classifier[0].in_features * SPARSITY) orig_model.features[2].out_channels * SPARSITY)
assert model.classifier[0].in_features == int(
orig_model.classifier[0].in_features * SPARSITY)
def test_speedup_bigmodel(self): def test_speedup_bigmodel(self):
prune_model_l1(BigModel()) prune_model_l1(BigModel())
...@@ -205,23 +247,55 @@ class SpeedupTestCase(TestCase): ...@@ -205,23 +247,55 @@ class SpeedupTestCase(TestCase):
model.eval() model.eval()
speedup_out = model(dummy_input) speedup_out = model(dummy_input)
if not torch.allclose(mask_out, speedup_out, atol=1e-07): if not torch.allclose(mask_out, speedup_out, atol=1e-07):
print('input:', dummy_input.size(), torch.abs(dummy_input).sum((2,3))) print('input:', dummy_input.size(),
torch.abs(dummy_input).sum((2, 3)))
print('mask_out:', mask_out) print('mask_out:', mask_out)
print('speedup_out:', speedup_out) print('speedup_out:', speedup_out)
raise RuntimeError('model speedup inference result is incorrect!') raise RuntimeError('model speedup inference result is incorrect!')
orig_model = BigModel() orig_model = BigModel()
assert model.backbone2.conv1.out_channels == int(orig_model.backbone2.conv1.out_channels * SPARSITY) assert model.backbone2.conv1.out_channels == int(
assert model.backbone2.conv2.in_channels == int(orig_model.backbone2.conv2.in_channels * SPARSITY) orig_model.backbone2.conv1.out_channels * SPARSITY)
assert model.backbone2.conv2.out_channels == int(orig_model.backbone2.conv2.out_channels * SPARSITY) assert model.backbone2.conv2.in_channels == int(
assert model.backbone2.fc1.in_features == int(orig_model.backbone2.fc1.in_features * SPARSITY) orig_model.backbone2.conv2.in_channels * SPARSITY)
assert model.backbone2.conv2.out_channels == int(
orig_model.backbone2.conv2.out_channels * SPARSITY)
assert model.backbone2.fc1.in_features == int(
orig_model.backbone2.fc1.in_features * SPARSITY)
def test_convtranspose_model(self):
ori_model = TransposeModel()
dummy_input = torch.rand(1, 3, 8, 8)
config_list = [{'sparsity': 0.5, 'op_types': ['Conv2d']}]
pruner = L1FilterPruner(ori_model, config_list)
pruner.compress()
ori_model(dummy_input)
pruner.export_model(MODEL_FILE, MASK_FILE)
pruner._unwrap_model()
new_model = TransposeModel()
state_dict = torch.load(MODEL_FILE)
new_model.load_state_dict(state_dict)
ms = ModelSpeedup(new_model, dummy_input, MASK_FILE)
ms.speedup_model()
zero_bn_bias(ori_model)
zero_bn_bias(new_model)
ori_out = ori_model(dummy_input)
new_out = new_model(dummy_input)
ori_sum = torch.sum(ori_out)
speeded_sum = torch.sum(new_out)
print('Tanspose Speedup Test: ori_sum={} speedup_sum={}'.format(ori_sum, speeded_sum))
assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
(abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
# FIXME: This test case might fail randomly, no idea why # FIXME: This test case might fail randomly, no idea why
# Example: https://msrasrg.visualstudio.com/NNIOpenSource/_build/results?buildId=16282 # Example: https://msrasrg.visualstudio.com/NNIOpenSource/_build/results?buildId=16282
def test_speedup_integration(self): def test_speedup_integration(self):
for model_name in ['resnet18', 'squeezenet1_1', 'mobilenet_v2', 'densenet121', 'densenet169', 'inception_v3', 'resnet50']: for model_name in ['resnet18', 'squeezenet1_1',
'mobilenet_v2', 'densenet121',
# 'inception_v3' inception is too large and may fail the pipeline
'densenet169', 'resnet50']:
kwargs = { kwargs = {
'pretrained': True 'pretrained': True
} }
...@@ -235,7 +309,7 @@ class SpeedupTestCase(TestCase): ...@@ -235,7 +309,7 @@ class SpeedupTestCase(TestCase):
Model = getattr(models, model_name) Model = getattr(models, model_name)
net = Model(**kwargs).to(device) net = Model(**kwargs).to(device)
speedup_model = Model(**kwargs).to(device) speedup_model = Model(**kwargs).to(device)
net.eval() # this line is necessary net.eval() # this line is necessary
speedup_model.eval() speedup_model.eval()
# random generate the prune config for the pruner # random generate the prune config for the pruner
cfgs = generate_random_sparsity(net) cfgs = generate_random_sparsity(net)
...@@ -258,8 +332,10 @@ class SpeedupTestCase(TestCase): ...@@ -258,8 +332,10 @@ class SpeedupTestCase(TestCase):
speeded_out = speedup_model(data) speeded_out = speedup_model(data)
ori_sum = torch.sum(ori_out).item() ori_sum = torch.sum(ori_out).item()
speeded_sum = torch.sum(speeded_out).item() speeded_sum = torch.sum(speeded_out).item()
print('Sum of the output of %s (before speedup):'%model_name, ori_sum) print('Sum of the output of %s (before speedup):' %
print('Sum of the output of %s (after speedup):'%model_name, speeded_sum) model_name, ori_sum)
print('Sum of the output of %s (after speedup):' %
model_name, speeded_sum)
assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \ assert (abs(ori_sum - speeded_sum) / abs(ori_sum) < RELATIVE_THRESHOLD) or \
(abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD) (abs(ori_sum - speeded_sum) < ABSOLUTE_THRESHOLD)
...@@ -296,5 +372,6 @@ class SpeedupTestCase(TestCase): ...@@ -296,5 +372,6 @@ class SpeedupTestCase(TestCase):
os.remove(MODEL_FILE) os.remove(MODEL_FILE)
os.remove(MASK_FILE) os.remove(MASK_FILE)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment