Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
nni
Commits
1011377c
Commit
1011377c
authored
Mar 31, 2022
by
qianyj
Browse files
the source code of NNI for DCU
parent
abc22158
Changes
788
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1603 additions
and
0 deletions
+1603
-0
examples/nas/legacy/cream/__init__.py
examples/nas/legacy/cream/__init__.py
+0
-0
examples/nas/legacy/cream/configs/retrain/114.yaml
examples/nas/legacy/cream/configs/retrain/114.yaml
+51
-0
examples/nas/legacy/cream/configs/retrain/14.yaml
examples/nas/legacy/cream/configs/retrain/14.yaml
+51
-0
examples/nas/legacy/cream/configs/retrain/23.yaml
examples/nas/legacy/cream/configs/retrain/23.yaml
+51
-0
examples/nas/legacy/cream/configs/retrain/287.yaml
examples/nas/legacy/cream/configs/retrain/287.yaml
+51
-0
examples/nas/legacy/cream/configs/retrain/43.yaml
examples/nas/legacy/cream/configs/retrain/43.yaml
+51
-0
examples/nas/legacy/cream/configs/retrain/481.yaml
examples/nas/legacy/cream/configs/retrain/481.yaml
+51
-0
examples/nas/legacy/cream/configs/retrain/604.yaml
examples/nas/legacy/cream/configs/retrain/604.yaml
+51
-0
examples/nas/legacy/cream/configs/retrain/72.yaml
examples/nas/legacy/cream/configs/retrain/72.yaml
+51
-0
examples/nas/legacy/cream/configs/test.yaml
examples/nas/legacy/cream/configs/test.yaml
+37
-0
examples/nas/legacy/cream/configs/train.yaml
examples/nas/legacy/cream/configs/train.yaml
+53
-0
examples/nas/legacy/cream/lib/config.py
examples/nas/legacy/cream/lib/config.py
+123
-0
examples/nas/legacy/cream/lib/core/retrain.py
examples/nas/legacy/cream/lib/core/retrain.py
+135
-0
examples/nas/legacy/cream/lib/core/test.py
examples/nas/legacy/cream/lib/core/test.py
+87
-0
examples/nas/legacy/cream/lib/models/blocks/__init__.py
examples/nas/legacy/cream/lib/models/blocks/__init__.py
+2
-0
examples/nas/legacy/cream/lib/models/blocks/inverted_residual_block.py
...legacy/cream/lib/models/blocks/inverted_residual_block.py
+113
-0
examples/nas/legacy/cream/lib/models/blocks/residual_block.py
...ples/nas/legacy/cream/lib/models/blocks/residual_block.py
+105
-0
examples/nas/legacy/cream/lib/models/builders/build_childnet.py
...es/nas/legacy/cream/lib/models/builders/build_childnet.py
+181
-0
examples/nas/legacy/cream/lib/models/builders/build_supernet.py
...es/nas/legacy/cream/lib/models/builders/build_supernet.py
+214
-0
examples/nas/legacy/cream/lib/models/structures/childnet.py
examples/nas/legacy/cream/lib/models/structures/childnet.py
+145
-0
No files found.
Too many changes to show.
To preserve performance only
788 of 788+
files are displayed.
Plain diff
Email patch
CHANGELOG
→
examples/nas/legacy/cream/__init__.py
100644 → 100755
View file @
1011377c
File moved
examples/nas/legacy/cream/configs/retrain/114.yaml
0 → 100755
View file @
1011377c
AUTO_RESUME
:
False
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
112m_retrain'
RESUME_PATH
:
'
./experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH
:
'
./experiments/workspace/retrain'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
8
NUM_GPU
:
8
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
random'
# Image resize interpolation type
BATCH_SIZE
:
128
# batch size
NO_PREFECHTER
:
False
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.2
SELECTION
:
470
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9999
LR
:
0.064
EPOCHS
:
500
OPT_EPS
:
1e-3
SCHED
:
'
cosine'
OPT
:
'
rmsproptf'
WARMUP_LR
:
1e-6
DECAY_EPOCHS
:
2.4
DECAY_RATE
:
0.973
WARMUP_EPOCHS
:
3
WEIGHT_DECAY
:
1e-5
AUGMENTATION
:
AA
:
'
rand-m9-mstd0.5'
RE_PROB
:
0.2
# random erase prob
RE_MODE
:
'
pixel'
# random erase mode
examples/nas/legacy/cream/configs/retrain/14.yaml
0 → 100755
View file @
1011377c
AUTO_RESUME
:
False
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
14m_retrain'
RESUME_PATH
:
'
./experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH
:
'
./experiments/workspace/retrain'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
8
NUM_GPU
:
8
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
random'
# Image resize interpolation type
BATCH_SIZE
:
128
# batch size
NO_PREFECHTER
:
False
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.2
SELECTION
:
470
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9999
LR
:
0.064
EPOCHS
:
500
OPT_EPS
:
1e-3
SCHED
:
'
cosine'
OPT
:
'
rmsproptf'
WARMUP_LR
:
1e-6
DECAY_EPOCHS
:
2.4
DECAY_RATE
:
0.973
WARMUP_EPOCHS
:
3
WEIGHT_DECAY
:
1e-5
AUGMENTATION
:
AA
:
'
rand-m9-mstd0.5'
RE_PROB
:
0.2
# random erase prob
RE_MODE
:
'
pixel'
# random erase mode
examples/nas/legacy/cream/configs/retrain/23.yaml
0 → 100755
View file @
1011377c
AUTO_RESUME
:
False
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
23m_retrain'
RESUME_PATH
:
'
./experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH
:
'
./experiments/workspace/retrain'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
8
NUM_GPU
:
8
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
random'
# Image resize interpolation type
BATCH_SIZE
:
128
# batch size
NO_PREFECHTER
:
False
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.2
SELECTION
:
470
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9999
LR
:
0.064
EPOCHS
:
500
OPT_EPS
:
1e-3
SCHED
:
'
cosine'
OPT
:
'
rmsproptf'
WARMUP_LR
:
1e-6
DECAY_EPOCHS
:
2.4
DECAY_RATE
:
0.973
WARMUP_EPOCHS
:
3
WEIGHT_DECAY
:
1e-5
AUGMENTATION
:
AA
:
'
rand-m9-mstd0.5'
RE_PROB
:
0.2
# random erase prob
RE_MODE
:
'
pixel'
# random erase mode
examples/nas/legacy/cream/configs/retrain/287.yaml
0 → 100755
View file @
1011377c
AUTO_RESUME
:
False
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
287m_retrain'
RESUME_PATH
:
'
./experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH
:
'
./experiments/workspace/retrain'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
8
NUM_GPU
:
8
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
random'
# Image resize interpolation type
BATCH_SIZE
:
128
# batch size
NO_PREFECHTER
:
False
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.2
SELECTION
:
470
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9999
LR
:
0.064
EPOCHS
:
500
OPT_EPS
:
1e-3
SCHED
:
'
cosine'
OPT
:
'
rmsproptf'
WARMUP_LR
:
1e-6
DECAY_EPOCHS
:
2.4
DECAY_RATE
:
0.973
WARMUP_EPOCHS
:
3
WEIGHT_DECAY
:
1e-5
AUGMENTATION
:
AA
:
'
rand-m9-mstd0.5'
RE_PROB
:
0.2
# random erase prob
RE_MODE
:
'
pixel'
# random erase mode
examples/nas/legacy/cream/configs/retrain/43.yaml
0 → 100755
View file @
1011377c
AUTO_RESUME
:
False
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
43m_retrain'
RESUME_PATH
:
'
./experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH
:
'
./experiments/workspace/retrain'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
8
NUM_GPU
:
8
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
random'
# Image resize interpolation type
BATCH_SIZE
:
128
# batch size
NO_PREFECHTER
:
False
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.2
SELECTION
:
43
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9999
LR
:
0.064
EPOCHS
:
500
OPT_EPS
:
1e-3
SCHED
:
'
cosine'
OPT
:
'
rmsproptf'
WARMUP_LR
:
1e-6
DECAY_EPOCHS
:
2.4
DECAY_RATE
:
0.973
WARMUP_EPOCHS
:
3
WEIGHT_DECAY
:
1e-5
AUGMENTATION
:
AA
:
'
rand-m9-mstd0.5'
RE_PROB
:
0.2
# random erase prob
RE_MODE
:
'
pixel'
# random erase mode
examples/nas/legacy/cream/configs/retrain/481.yaml
0 → 100755
View file @
1011377c
AUTO_RESUME
:
False
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
481m_retrain'
RESUME_PATH
:
'
./experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH
:
'
./experiments/workspace/retrain'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
8
NUM_GPU
:
8
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
random'
# Image resize interpolation type
BATCH_SIZE
:
128
# batch size
NO_PREFECHTER
:
False
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.2
SELECTION
:
481
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9999
LR
:
0.064
EPOCHS
:
500
OPT_EPS
:
1e-3
SCHED
:
'
cosine'
OPT
:
'
rmsproptf'
WARMUP_LR
:
1e-6
DECAY_EPOCHS
:
2.4
DECAY_RATE
:
0.973
WARMUP_EPOCHS
:
3
WEIGHT_DECAY
:
1e-5
AUGMENTATION
:
AA
:
'
rand-m9-mstd0.5'
RE_PROB
:
0.2
# random erase prob
RE_MODE
:
'
pixel'
# random erase mode
examples/nas/legacy/cream/configs/retrain/604.yaml
0 → 100755
View file @
1011377c
AUTO_RESUME
:
False
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
604m_retrain'
RESUME_PATH
:
'
./experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH
:
'
./experiments/workspace/retrain'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
8
NUM_GPU
:
8
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
random'
# Image resize interpolation type
BATCH_SIZE
:
128
# batch size
NO_PREFECHTER
:
False
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.2
SELECTION
:
604
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9999
LR
:
0.064
EPOCHS
:
500
OPT_EPS
:
1e-3
SCHED
:
'
cosine'
OPT
:
'
rmsproptf'
WARMUP_LR
:
1e-6
DECAY_EPOCHS
:
2.4
DECAY_RATE
:
0.973
WARMUP_EPOCHS
:
3
WEIGHT_DECAY
:
1e-5
AUGMENTATION
:
AA
:
'
rand-m9-mstd0.5'
RE_PROB
:
0.2
# random erase prob
RE_MODE
:
'
pixel'
# random erase mode
examples/nas/legacy/cream/configs/retrain/72.yaml
0 → 100755
View file @
1011377c
AUTO_RESUME
:
False
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
72m_retrain'
RESUME_PATH
:
'
./experiments/workspace/retrain/resume.pth.tar'
SAVE_PATH
:
'
./experiments/workspace/retrain'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
8
NUM_GPU
:
8
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
random'
# Image resize interpolation type
BATCH_SIZE
:
128
# batch size
NO_PREFECHTER
:
False
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.2
SELECTION
:
470
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9999
LR
:
0.064
EPOCHS
:
500
OPT_EPS
:
1e-3
SCHED
:
'
cosine'
OPT
:
'
rmsproptf'
WARMUP_LR
:
1e-6
DECAY_EPOCHS
:
2.4
DECAY_RATE
:
0.973
WARMUP_EPOCHS
:
3
WEIGHT_DECAY
:
1e-5
AUGMENTATION
:
AA
:
'
rand-m9-mstd0.5'
RE_PROB
:
0.2
# random erase prob
RE_MODE
:
'
pixel'
# random erase mode
examples/nas/legacy/cream/configs/test.yaml
0 → 100644
View file @
1011377c
AUTO_RESUME
:
True
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
Childnet_Testing'
RESUME_PATH
:
'
./experiments/workspace/ckps/42.pth.tar'
SAVE_PATH
:
'
./'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
4
NUM_GPU
:
2
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
bilinear'
# Image resize interpolation type
BATCH_SIZE
:
32
# batch size
NO_PREFECHTER
:
False
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.0
SELECTION
:
42
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9998
OPTIMIZER
:
MOMENTUM
:
0.9
WEIGHT_DECAY
:
1e-3
\ No newline at end of file
examples/nas/legacy/cream/configs/train.yaml
0 → 100644
View file @
1011377c
AUTO_RESUME
:
False
DATA_DIR
:
'
./data/imagenet'
MODEL
:
'
Supernet_Training'
RESUME_PATH
:
'
./experiments/workspace/train/resume.pth.tar'
SAVE_PATH
:
'
./'
SEED
:
42
LOG_INTERVAL
:
50
RECOVERY_INTERVAL
:
0
WORKERS
:
8
NUM_GPU
:
8
SAVE_IMAGES
:
False
AMP
:
False
OUTPUT
:
'
None'
EVAL_METRICS
:
'
prec1'
TTA
:
0
LOCAL_RANK
:
0
DATASET
:
NUM_CLASSES
:
1000
IMAGE_SIZE
:
224
# image patch size
INTERPOLATION
:
'
bilinear'
# Image resize interpolation type
BATCH_SIZE
:
128
# batch size
NET
:
GP
:
'
avg'
DROPOUT_RATE
:
0.0
EMA
:
USE
:
True
FORCE_CPU
:
False
# force model ema to be tracked on CPU
DECAY
:
0.9998
OPT
:
'
sgd'
LR
:
1.0
EPOCHS
:
120
META_LR
:
1e-4
BATCHNORM
:
SYNC_BN
:
False
SUPERNET
:
UPDATE_ITER
:
200
SLICE
:
4
POOL_SIZE
:
10
RESUNIT
:
False
DIL_CONV
:
False
UPDATE_2ND
:
True
FLOPS_MINIMUM
:
0
FLOPS_MAXIMUM
:
600
PICK_METHOD
:
'
meta'
META_STA_EPOCH
:
20
HOW_TO_PROB
:
'
pre_prob'
PRE_PROB
:
(0.05,0.2,0.05,0.5,0.05,0.15)
\ No newline at end of file
examples/nas/legacy/cream/lib/config.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
unicode_literals
from
yacs.config
import
CfgNode
as
CN
DEFAULT_CROP_PCT
=
0.875
IMAGENET_DEFAULT_MEAN
=
(
0.485
,
0.456
,
0.406
)
IMAGENET_DEFAULT_STD
=
(
0.229
,
0.224
,
0.225
)
__C
=
CN
()
cfg
=
__C
__C
.
AUTO_RESUME
=
True
__C
.
DATA_DIR
=
'./data/imagenet'
__C
.
MODEL
=
'cream'
__C
.
RESUME_PATH
=
'./experiments/ckps/resume.pth.tar'
__C
.
SAVE_PATH
=
'./experiments/ckps/'
__C
.
SEED
=
42
__C
.
LOG_INTERVAL
=
50
__C
.
RECOVERY_INTERVAL
=
0
__C
.
WORKERS
=
4
__C
.
NUM_GPU
=
1
__C
.
SAVE_IMAGES
=
False
__C
.
AMP
=
False
__C
.
ACC_GAP
=
5
__C
.
OUTPUT
=
'output/path/'
__C
.
EVAL_METRICS
=
'prec1'
__C
.
TTA
=
0
# Test or inference time augmentation
__C
.
LOCAL_RANK
=
0
__C
.
VERBOSE
=
False
# dataset configs
__C
.
DATASET
=
CN
()
__C
.
DATASET
.
NUM_CLASSES
=
1000
__C
.
DATASET
.
IMAGE_SIZE
=
224
# image patch size
__C
.
DATASET
.
INTERPOLATION
=
'bilinear'
# Image resize interpolation type
__C
.
DATASET
.
BATCH_SIZE
=
32
# batch size
__C
.
DATASET
.
NO_PREFECHTER
=
False
__C
.
DATASET
.
PIN_MEM
=
True
__C
.
DATASET
.
VAL_BATCH_MUL
=
4
# model configs
__C
.
NET
=
CN
()
__C
.
NET
.
SELECTION
=
14
__C
.
NET
.
GP
=
'avg'
# type of global pool ["avg", "max", "avgmax", "avgmaxc"]
__C
.
NET
.
DROPOUT_RATE
=
0.0
# dropout rate
__C
.
NET
.
INPUT_ARCH
=
[[
0
],
[
3
],
[
3
,
3
],
[
3
,
1
,
3
],
[
3
,
3
,
3
,
3
],
[
3
,
3
,
3
],
[
0
]]
# model ema parameters
__C
.
NET
.
EMA
=
CN
()
__C
.
NET
.
EMA
.
USE
=
True
__C
.
NET
.
EMA
.
FORCE_CPU
=
False
# force model ema to be tracked on CPU
__C
.
NET
.
EMA
.
DECAY
=
0.9998
# optimizer configs
__C
.
OPT
=
'sgd'
__C
.
OPT_EPS
=
1e-2
__C
.
MOMENTUM
=
0.9
__C
.
WEIGHT_DECAY
=
1e-4
__C
.
OPTIMIZER
=
CN
()
__C
.
OPTIMIZER
.
NAME
=
'sgd'
__C
.
OPTIMIZER
.
MOMENTUM
=
0.9
__C
.
OPTIMIZER
.
WEIGHT_DECAY
=
1e-3
# scheduler configs
__C
.
SCHED
=
'sgd'
__C
.
LR_NOISE
=
None
__C
.
LR_NOISE_PCT
=
0.67
__C
.
LR_NOISE_STD
=
1.0
__C
.
WARMUP_LR
=
1e-4
__C
.
MIN_LR
=
1e-5
__C
.
EPOCHS
=
200
__C
.
START_EPOCH
=
None
__C
.
DECAY_EPOCHS
=
30.0
__C
.
WARMUP_EPOCHS
=
3
__C
.
COOLDOWN_EPOCHS
=
10
__C
.
PATIENCE_EPOCHS
=
10
__C
.
DECAY_RATE
=
0.1
__C
.
LR
=
1e-2
__C
.
META_LR
=
1e-4
# data augmentation parameters
__C
.
AUGMENTATION
=
CN
()
__C
.
AUGMENTATION
.
AA
=
'rand-m9-mstd0.5'
__C
.
AUGMENTATION
.
COLOR_JITTER
=
0.4
__C
.
AUGMENTATION
.
RE_PROB
=
0.2
# random erase prob
__C
.
AUGMENTATION
.
RE_MODE
=
'pixel'
# random erase mode
__C
.
AUGMENTATION
.
MIXUP
=
0.0
# mixup alpha
__C
.
AUGMENTATION
.
MIXUP_OFF_EPOCH
=
0
# turn off mixup after this epoch
__C
.
AUGMENTATION
.
SMOOTHING
=
0.1
# label smoothing parameters
# batch norm parameters (only works with gen_efficientnet based models
# currently)
__C
.
BATCHNORM
=
CN
()
__C
.
BATCHNORM
.
SYNC_BN
=
False
__C
.
BATCHNORM
.
BN_TF
=
False
__C
.
BATCHNORM
.
BN_MOMENTUM
=
0.1
# batchnorm momentum override
__C
.
BATCHNORM
.
BN_EPS
=
1e-5
# batchnorm eps override
# supernet training hyperparameters
__C
.
SUPERNET
=
CN
()
__C
.
SUPERNET
.
UPDATE_ITER
=
1300
__C
.
SUPERNET
.
SLICE
=
4
__C
.
SUPERNET
.
POOL_SIZE
=
10
__C
.
SUPERNET
.
RESUNIT
=
False
__C
.
SUPERNET
.
DIL_CONV
=
False
__C
.
SUPERNET
.
UPDATE_2ND
=
True
__C
.
SUPERNET
.
FLOPS_MAXIMUM
=
600
__C
.
SUPERNET
.
FLOPS_MINIMUM
=
0
__C
.
SUPERNET
.
PICK_METHOD
=
'meta'
# pick teacher method
__C
.
SUPERNET
.
META_STA_EPOCH
=
20
# start using meta picking method
__C
.
SUPERNET
.
HOW_TO_PROB
=
'pre_prob'
# sample method
__C
.
SUPERNET
.
PRE_PROB
=
(
0.05
,
0.2
,
0.05
,
0.5
,
0.05
,
0.15
)
# sample prob in 'pre_prob'
examples/nas/legacy/cream/lib/core/retrain.py
0 → 100644
View file @
1011377c
import
os
import
time
import
torch
import
torchvision
from
collections
import
OrderedDict
from
lib.utils.util
import
AverageMeter
,
accuracy
,
reduce_tensor
def
train_epoch
(
epoch
,
model
,
loader
,
optimizer
,
loss_fn
,
cfg
,
lr_scheduler
=
None
,
saver
=
None
,
output_dir
=
''
,
use_amp
=
False
,
model_ema
=
None
,
logger
=
None
,
writer
=
None
,
local_rank
=
0
):
batch_time_m
=
AverageMeter
()
data_time_m
=
AverageMeter
()
losses_m
=
AverageMeter
()
prec1_m
=
AverageMeter
()
prec5_m
=
AverageMeter
()
model
.
train
()
end
=
time
.
time
()
last_idx
=
len
(
loader
)
-
1
num_updates
=
epoch
*
len
(
loader
)
optimizer
.
zero_grad
()
for
batch_idx
,
(
input
,
target
)
in
enumerate
(
loader
):
last_batch
=
batch_idx
==
last_idx
data_time_m
.
update
(
time
.
time
()
-
end
)
input
=
input
.
cuda
()
target
=
target
.
cuda
()
output
=
model
(
input
)
loss
=
loss_fn
(
output
,
target
)
prec1
,
prec5
=
accuracy
(
output
,
target
,
topk
=
(
1
,
5
))
if
cfg
.
NUM_GPU
>
1
:
reduced_loss
=
reduce_tensor
(
loss
.
data
,
cfg
.
NUM_GPU
)
prec1
=
reduce_tensor
(
prec1
,
cfg
.
NUM_GPU
)
prec5
=
reduce_tensor
(
prec5
,
cfg
.
NUM_GPU
)
else
:
reduced_loss
=
loss
.
data
optimizer
.
zero_grad
()
loss
.
backward
()
optimizer
.
step
()
torch
.
cuda
.
synchronize
()
losses_m
.
update
(
reduced_loss
.
item
(),
input
.
size
(
0
))
prec1_m
.
update
(
prec1
.
item
(),
output
.
size
(
0
))
prec5_m
.
update
(
prec5
.
item
(),
output
.
size
(
0
))
if
model_ema
is
not
None
:
model_ema
.
update
(
model
)
num_updates
+=
1
batch_time_m
.
update
(
time
.
time
()
-
end
)
if
last_batch
or
batch_idx
%
cfg
.
LOG_INTERVAL
==
0
:
lrl
=
[
param_group
[
'lr'
]
for
param_group
in
optimizer
.
param_groups
]
lr
=
sum
(
lrl
)
/
len
(
lrl
)
if
local_rank
==
0
:
logger
.
info
(
'Train: {} [{:>4d}/{}] '
'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) '
'Prec@1: {top1.val:>7.4f} ({top1.avg:>7.4f}) '
'Prec@5: {top5.val:>7.4f} ({top5.avg:>7.4f}) '
'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s '
'({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) '
'LR: {lr:.3e}'
'Data: {data_time.val:.3f} ({data_time.avg:.3f})'
.
format
(
epoch
,
batch_idx
,
len
(
loader
),
loss
=
losses_m
,
top1
=
prec1_m
,
top5
=
prec5_m
,
batch_time
=
batch_time_m
,
rate
=
input
.
size
(
0
)
*
cfg
.
NUM_GPU
/
batch_time_m
.
val
,
rate_avg
=
input
.
size
(
0
)
*
cfg
.
NUM_GPU
/
batch_time_m
.
avg
,
lr
=
lr
,
data_time
=
data_time_m
))
writer
.
add_scalar
(
'Loss/train'
,
prec1_m
.
avg
,
epoch
*
len
(
loader
)
+
batch_idx
)
writer
.
add_scalar
(
'Accuracy/train'
,
prec1_m
.
avg
,
epoch
*
len
(
loader
)
+
batch_idx
)
writer
.
add_scalar
(
'Learning_Rate'
,
optimizer
.
param_groups
[
0
][
'lr'
],
epoch
*
len
(
loader
)
+
batch_idx
)
if
cfg
.
SAVE_IMAGES
and
output_dir
:
torchvision
.
utils
.
save_image
(
input
,
os
.
path
.
join
(
output_dir
,
'train-batch-%d.jpg'
%
batch_idx
),
padding
=
0
,
normalize
=
True
)
if
saver
is
not
None
and
cfg
.
RECOVERY_INTERVAL
and
(
last_batch
or
(
batch_idx
+
1
)
%
cfg
.
RECOVERY_INTERVAL
==
0
):
saver
.
save_recovery
(
model
,
optimizer
,
cfg
,
epoch
,
model_ema
=
model_ema
,
use_amp
=
use_amp
,
batch_idx
=
batch_idx
)
if
lr_scheduler
is
not
None
:
lr_scheduler
.
step_update
(
num_updates
=
num_updates
,
metric
=
losses_m
.
avg
)
end
=
time
.
time
()
# end for
if
hasattr
(
optimizer
,
'sync_lookahead'
):
optimizer
.
sync_lookahead
()
return
OrderedDict
([(
'loss'
,
losses_m
.
avg
)])
examples/nas/legacy/cream/lib/core/test.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
import
time
import
torch
from
collections
import
OrderedDict
from
lib.utils.util
import
AverageMeter
,
accuracy
,
reduce_tensor
def
validate
(
epoch
,
model
,
loader
,
loss_fn
,
cfg
,
log_suffix
=
''
,
logger
=
None
,
writer
=
None
,
local_rank
=
0
):
batch_time_m
=
AverageMeter
()
losses_m
=
AverageMeter
()
prec1_m
=
AverageMeter
()
prec5_m
=
AverageMeter
()
model
.
eval
()
end
=
time
.
time
()
last_idx
=
len
(
loader
)
-
1
with
torch
.
no_grad
():
for
batch_idx
,
(
input
,
target
)
in
enumerate
(
loader
):
last_batch
=
batch_idx
==
last_idx
output
=
model
(
input
)
if
isinstance
(
output
,
(
tuple
,
list
)):
output
=
output
[
0
]
# augmentation reduction
reduce_factor
=
cfg
.
TTA
if
reduce_factor
>
1
:
output
=
output
.
unfold
(
0
,
reduce_factor
,
reduce_factor
).
mean
(
dim
=
2
)
target
=
target
[
0
:
target
.
size
(
0
):
reduce_factor
]
loss
=
loss_fn
(
output
,
target
)
prec1
,
prec5
=
accuracy
(
output
,
target
,
topk
=
(
1
,
5
))
if
cfg
.
NUM_GPU
>
1
:
reduced_loss
=
reduce_tensor
(
loss
.
data
,
cfg
.
NUM_GPU
)
prec1
=
reduce_tensor
(
prec1
,
cfg
.
NUM_GPU
)
prec5
=
reduce_tensor
(
prec5
,
cfg
.
NUM_GPU
)
else
:
reduced_loss
=
loss
.
data
torch
.
cuda
.
synchronize
()
losses_m
.
update
(
reduced_loss
.
item
(),
input
.
size
(
0
))
prec1_m
.
update
(
prec1
.
item
(),
output
.
size
(
0
))
prec5_m
.
update
(
prec5
.
item
(),
output
.
size
(
0
))
batch_time_m
.
update
(
time
.
time
()
-
end
)
end
=
time
.
time
()
if
local_rank
==
0
and
(
last_batch
or
batch_idx
%
cfg
.
LOG_INTERVAL
==
0
):
log_name
=
'Test'
+
log_suffix
logger
.
info
(
'{0}: [{1:>4d}/{2}] '
'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) '
'Prec@1: {top1.val:>7.4f} ({top1.avg:>7.4f}) '
'Prec@5: {top5.val:>7.4f} ({top5.avg:>7.4f})'
.
format
(
log_name
,
batch_idx
,
last_idx
,
batch_time
=
batch_time_m
,
loss
=
losses_m
,
top1
=
prec1_m
,
top5
=
prec5_m
))
writer
.
add_scalar
(
'Loss'
+
log_suffix
+
'/vaild'
,
prec1_m
.
avg
,
epoch
*
len
(
loader
)
+
batch_idx
)
writer
.
add_scalar
(
'Accuracy'
+
log_suffix
+
'/vaild'
,
prec1_m
.
avg
,
epoch
*
len
(
loader
)
+
batch_idx
)
metrics
=
OrderedDict
(
[(
'loss'
,
losses_m
.
avg
),
(
'prec1'
,
prec1_m
.
avg
),
(
'prec5'
,
prec5_m
.
avg
)])
return
metrics
examples/nas/legacy/cream/lib/models/blocks/__init__.py
0 → 100644
View file @
1011377c
from
lib.models.blocks.residual_block
import
get_Bottleneck
,
get_BasicBlock
from
lib.models.blocks.inverted_residual_block
import
InvertedResidual
\ No newline at end of file
examples/nas/legacy/cream/lib/models/blocks/inverted_residual_block.py
0 → 100644
View file @
1011377c
# This file is downloaded from
# https://github.com/rwightman/pytorch-image-models
import
torch.nn
as
nn
from
timm.models.layers
import
create_conv2d
from
timm.models.efficientnet_blocks
import
make_divisible
,
resolve_se_args
,
\
SqueezeExcite
,
drop_path
class
InvertedResidual
(
nn
.
Module
):
""" Inverted residual block w/ optional SE and CondConv routing"""
def
__init__
(
self
,
in_chs
,
out_chs
,
dw_kernel_size
=
3
,
stride
=
1
,
dilation
=
1
,
pad_type
=
''
,
act_layer
=
nn
.
ReLU
,
noskip
=
False
,
exp_ratio
=
1.0
,
exp_kernel_size
=
1
,
pw_kernel_size
=
1
,
se_ratio
=
0.
,
se_kwargs
=
None
,
norm_layer
=
nn
.
BatchNorm2d
,
norm_kwargs
=
None
,
conv_kwargs
=
None
,
drop_path_rate
=
0.
):
super
(
InvertedResidual
,
self
).
__init__
()
norm_kwargs
=
norm_kwargs
or
{}
conv_kwargs
=
conv_kwargs
or
{}
mid_chs
=
make_divisible
(
in_chs
*
exp_ratio
)
has_se
=
se_ratio
is
not
None
and
se_ratio
>
0.
self
.
has_residual
=
(
in_chs
==
out_chs
and
stride
==
1
)
and
not
noskip
self
.
drop_path_rate
=
drop_path_rate
# Point-wise expansion
self
.
conv_pw
=
create_conv2d
(
in_chs
,
mid_chs
,
exp_kernel_size
,
padding
=
pad_type
,
**
conv_kwargs
)
self
.
bn1
=
norm_layer
(
mid_chs
,
**
norm_kwargs
)
self
.
act1
=
act_layer
(
inplace
=
True
)
# Depth-wise convolution
self
.
conv_dw
=
create_conv2d
(
mid_chs
,
mid_chs
,
dw_kernel_size
,
stride
=
stride
,
dilation
=
dilation
,
padding
=
pad_type
,
depthwise
=
True
,
**
conv_kwargs
)
self
.
bn2
=
norm_layer
(
mid_chs
,
**
norm_kwargs
)
self
.
act2
=
act_layer
(
inplace
=
True
)
# Squeeze-and-excitation
if
has_se
:
se_kwargs
=
resolve_se_args
(
se_kwargs
,
in_chs
,
act_layer
)
self
.
se
=
SqueezeExcite
(
mid_chs
,
se_ratio
=
se_ratio
,
**
se_kwargs
)
else
:
self
.
se
=
None
# Point-wise linear projection
self
.
conv_pwl
=
create_conv2d
(
mid_chs
,
out_chs
,
pw_kernel_size
,
padding
=
pad_type
,
**
conv_kwargs
)
self
.
bn3
=
norm_layer
(
out_chs
,
**
norm_kwargs
)
def
feature_info
(
self
,
location
):
if
location
==
'expansion'
:
# after SE, input to PWL
info
=
dict
(
module
=
'conv_pwl'
,
hook_type
=
'forward_pre'
,
num_chs
=
self
.
conv_pwl
.
in_channels
)
else
:
# location == 'bottleneck', block output
info
=
dict
(
module
=
''
,
hook_type
=
''
,
num_chs
=
self
.
conv_pwl
.
out_channels
)
return
info
def
forward
(
self
,
x
):
residual
=
x
# Point-wise expansion
x
=
self
.
conv_pw
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
act1
(
x
)
# Depth-wise convolution
x
=
self
.
conv_dw
(
x
)
x
=
self
.
bn2
(
x
)
x
=
self
.
act2
(
x
)
# Squeeze-and-excitation
if
self
.
se
is
not
None
:
x
=
self
.
se
(
x
)
# Point-wise linear projection
x
=
self
.
conv_pwl
(
x
)
x
=
self
.
bn3
(
x
)
if
self
.
has_residual
:
if
self
.
drop_path_rate
>
0.
:
x
=
drop_path
(
x
,
self
.
drop_path_rate
,
self
.
training
)
x
+=
residual
return
x
examples/nas/legacy/cream/lib/models/blocks/residual_block.py
0 → 100644
View file @
1011377c
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Written by Hao Du and Houwen Peng
# email: haodu8-c@my.cityu.edu.hk and houwen.peng@microsoft.com
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
):
"3x3 convolution with padding"
return
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
True
)
class
BasicBlock
(
nn
.
Module
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
downsample
=
None
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv1
=
conv3x3
(
inplanes
,
planes
,
stride
)
self
.
bn1
=
nn
.
BatchNorm2d
(
planes
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
conv2
=
conv3x3
(
planes
,
planes
)
self
.
bn2
=
nn
.
BatchNorm2d
(
planes
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
Bottleneck
(
nn
.
Module
):
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
expansion
=
4
):
super
(
Bottleneck
,
self
).
__init__
()
planes
=
int
(
planes
/
expansion
)
self
.
conv1
=
nn
.
Conv2d
(
inplanes
,
planes
,
kernel_size
=
1
,
bias
=
True
)
self
.
bn1
=
nn
.
BatchNorm2d
(
planes
)
self
.
conv2
=
nn
.
Conv2d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
True
)
self
.
bn2
=
nn
.
BatchNorm2d
(
planes
)
self
.
conv3
=
nn
.
Conv2d
(
planes
,
planes
*
expansion
,
kernel_size
=
1
,
bias
=
True
)
self
.
bn3
=
nn
.
BatchNorm2d
(
planes
*
expansion
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
stride
=
stride
self
.
expansion
=
expansion
if
inplanes
!=
planes
*
self
.
expansion
:
self
.
downsample
=
nn
.
Sequential
(
nn
.
Conv2d
(
inplanes
,
planes
*
self
.
expansion
,
kernel_size
=
1
,
stride
=
stride
,
bias
=
True
),
nn
.
BatchNorm2d
(
planes
*
self
.
expansion
),
)
else
:
self
.
downsample
=
None
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
bn3
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
def
get_Bottleneck
(
in_c
,
out_c
,
stride
):
return
Bottleneck
(
in_c
,
out_c
,
stride
=
stride
)
def
get_BasicBlock
(
in_c
,
out_c
,
stride
):
return
BasicBlock
(
in_c
,
out_c
,
stride
=
stride
)
examples/nas/legacy/cream/lib/models/builders/build_childnet.py
0 → 100755
View file @
1011377c
from
lib.utils.util
import
*
from
timm.models.efficientnet_blocks
import
*
class
ChildNetBuilder
:
def
__init__
(
self
,
channel_multiplier
=
1.0
,
channel_divisor
=
8
,
channel_min
=
None
,
output_stride
=
32
,
pad_type
=
''
,
act_layer
=
None
,
se_kwargs
=
None
,
norm_layer
=
nn
.
BatchNorm2d
,
norm_kwargs
=
None
,
drop_path_rate
=
0.
,
feature_location
=
''
,
verbose
=
False
,
logger
=
None
):
self
.
channel_multiplier
=
channel_multiplier
self
.
channel_divisor
=
channel_divisor
self
.
channel_min
=
channel_min
self
.
output_stride
=
output_stride
self
.
pad_type
=
pad_type
self
.
act_layer
=
act_layer
self
.
se_kwargs
=
se_kwargs
self
.
norm_layer
=
norm_layer
self
.
norm_kwargs
=
norm_kwargs
self
.
drop_path_rate
=
drop_path_rate
self
.
feature_location
=
feature_location
assert
feature_location
in
(
'pre_pwl'
,
'post_exp'
,
''
)
self
.
verbose
=
verbose
self
.
in_chs
=
None
self
.
features
=
OrderedDict
()
self
.
logger
=
logger
def
_round_channels
(
self
,
chs
):
return
round_channels
(
chs
,
self
.
channel_multiplier
,
self
.
channel_divisor
,
self
.
channel_min
)
def
_make_block
(
self
,
ba
,
block_idx
,
block_count
):
drop_path_rate
=
self
.
drop_path_rate
*
block_idx
/
block_count
bt
=
ba
.
pop
(
'block_type'
)
ba
[
'in_chs'
]
=
self
.
in_chs
ba
[
'out_chs'
]
=
self
.
_round_channels
(
ba
[
'out_chs'
])
if
'fake_in_chs'
in
ba
and
ba
[
'fake_in_chs'
]:
ba
[
'fake_in_chs'
]
=
self
.
_round_channels
(
ba
[
'fake_in_chs'
])
ba
[
'norm_layer'
]
=
self
.
norm_layer
ba
[
'norm_kwargs'
]
=
self
.
norm_kwargs
ba
[
'pad_type'
]
=
self
.
pad_type
# block act fn overrides the model default
ba
[
'act_layer'
]
=
ba
[
'act_layer'
]
if
ba
[
'act_layer'
]
is
not
None
else
self
.
act_layer
assert
ba
[
'act_layer'
]
is
not
None
if
bt
==
'ir'
:
ba
[
'drop_path_rate'
]
=
drop_path_rate
ba
[
'se_kwargs'
]
=
self
.
se_kwargs
if
self
.
verbose
:
self
.
logger
.
info
(
' InvertedResidual {}, Args: {}'
.
format
(
block_idx
,
str
(
ba
)))
block
=
InvertedResidual
(
**
ba
)
elif
bt
==
'ds'
or
bt
==
'dsa'
:
ba
[
'drop_path_rate'
]
=
drop_path_rate
ba
[
'se_kwargs'
]
=
self
.
se_kwargs
if
self
.
verbose
:
self
.
logger
.
info
(
' DepthwiseSeparable {}, Args: {}'
.
format
(
block_idx
,
str
(
ba
)))
block
=
DepthwiseSeparableConv
(
**
ba
)
elif
bt
==
'cn'
:
if
self
.
verbose
:
self
.
logger
.
info
(
' ConvBnAct {}, Args: {}'
.
format
(
block_idx
,
str
(
ba
)))
block
=
ConvBnAct
(
**
ba
)
else
:
assert
False
,
'Uknkown block type (%s) while building model.'
%
bt
self
.
in_chs
=
ba
[
'out_chs'
]
# update in_chs for arg of next block
return
block
def
__call__
(
self
,
in_chs
,
model_block_args
):
""" Build the blocks
Args:
in_chs: Number of input-channels passed to first block
model_block_args: A list of lists, outer list defines stages, inner
list contains strings defining block configuration(s)
Return:
List of block stacks (each stack wrapped in nn.Sequential)
"""
if
self
.
verbose
:
self
.
logger
.
info
(
'Building model trunk with %d stages...'
%
len
(
model_block_args
))
self
.
in_chs
=
in_chs
total_block_count
=
sum
([
len
(
x
)
for
x
in
model_block_args
])
total_block_idx
=
0
current_stride
=
2
current_dilation
=
1
feature_idx
=
0
stages
=
[]
# outer list of block_args defines the stacks ('stages' by some
# conventions)
for
stage_idx
,
stage_block_args
in
enumerate
(
model_block_args
):
last_stack
=
stage_idx
==
(
len
(
model_block_args
)
-
1
)
if
self
.
verbose
:
self
.
logger
.
info
(
'Stack: {}'
.
format
(
stage_idx
))
assert
isinstance
(
stage_block_args
,
list
)
blocks
=
[]
# each stack (stage) contains a list of block arguments
for
block_idx
,
block_args
in
enumerate
(
stage_block_args
):
last_block
=
block_idx
==
(
len
(
stage_block_args
)
-
1
)
extract_features
=
''
# No features extracted
if
self
.
verbose
:
self
.
logger
.
info
(
' Block: {}'
.
format
(
block_idx
))
# Sort out stride, dilation, and feature extraction details
assert
block_args
[
'stride'
]
in
(
1
,
2
)
if
block_idx
>=
1
:
# only the first block in any stack can have a stride > 1
block_args
[
'stride'
]
=
1
do_extract
=
False
if
self
.
feature_location
==
'pre_pwl'
:
if
last_block
:
next_stage_idx
=
stage_idx
+
1
if
next_stage_idx
>=
len
(
model_block_args
):
do_extract
=
True
else
:
do_extract
=
model_block_args
[
next_stage_idx
][
0
][
'stride'
]
>
1
elif
self
.
feature_location
==
'post_exp'
:
if
block_args
[
'stride'
]
>
1
or
(
last_stack
and
last_block
):
do_extract
=
True
if
do_extract
:
extract_features
=
self
.
feature_location
next_dilation
=
current_dilation
if
block_args
[
'stride'
]
>
1
:
next_output_stride
=
current_stride
*
block_args
[
'stride'
]
if
next_output_stride
>
self
.
output_stride
:
next_dilation
=
current_dilation
*
block_args
[
'stride'
]
block_args
[
'stride'
]
=
1
if
self
.
verbose
:
self
.
logger
.
info
(
' Converting stride to dilation to maintain output_stride=={}'
.
format
(
self
.
output_stride
))
else
:
current_stride
=
next_output_stride
block_args
[
'dilation'
]
=
current_dilation
if
next_dilation
!=
current_dilation
:
current_dilation
=
next_dilation
# create the block
block
=
self
.
_make_block
(
block_args
,
total_block_idx
,
total_block_count
)
blocks
.
append
(
block
)
# stash feature module name and channel info for model feature
# extraction
if
extract_features
:
feature_module
=
block
.
feature_module
(
extract_features
)
if
feature_module
:
feature_module
=
'blocks.{}.{}.'
.
format
(
stage_idx
,
block_idx
)
+
feature_module
feature_channels
=
block
.
feature_channels
(
extract_features
)
self
.
features
[
feature_idx
]
=
dict
(
name
=
feature_module
,
num_chs
=
feature_channels
)
feature_idx
+=
1
# incr global block idx (across all stacks)
total_block_idx
+=
1
stages
.
append
(
nn
.
Sequential
(
*
blocks
))
return
stages
examples/nas/legacy/cream/lib/models/builders/build_supernet.py
0 → 100644
View file @
1011377c
from
copy
import
deepcopy
from
lib.utils.builder_util
import
modify_block_args
from
lib.models.blocks
import
get_Bottleneck
,
InvertedResidual
from
timm.models.efficientnet_blocks
import
*
from
nni.nas.pytorch
import
mutables
class
SuperNetBuilder
:
""" Build Trunk Blocks
"""
def
__init__
(
self
,
choices
,
channel_multiplier
=
1.0
,
channel_divisor
=
8
,
channel_min
=
None
,
output_stride
=
32
,
pad_type
=
''
,
act_layer
=
None
,
se_kwargs
=
None
,
norm_layer
=
nn
.
BatchNorm2d
,
norm_kwargs
=
None
,
drop_path_rate
=
0.
,
feature_location
=
''
,
verbose
=
False
,
resunit
=
False
,
dil_conv
=
False
,
logger
=
None
):
# dict
# choices = {'kernel_size': [3, 5, 7], 'exp_ratio': [4, 6]}
self
.
choices
=
[[
x
,
y
]
for
x
in
choices
[
'kernel_size'
]
for
y
in
choices
[
'exp_ratio'
]]
self
.
choices_num
=
len
(
self
.
choices
)
-
1
self
.
channel_multiplier
=
channel_multiplier
self
.
channel_divisor
=
channel_divisor
self
.
channel_min
=
channel_min
self
.
output_stride
=
output_stride
self
.
pad_type
=
pad_type
self
.
act_layer
=
act_layer
self
.
se_kwargs
=
se_kwargs
self
.
norm_layer
=
norm_layer
self
.
norm_kwargs
=
norm_kwargs
self
.
drop_path_rate
=
drop_path_rate
self
.
feature_location
=
feature_location
assert
feature_location
in
(
'pre_pwl'
,
'post_exp'
,
''
)
self
.
verbose
=
verbose
self
.
resunit
=
resunit
self
.
dil_conv
=
dil_conv
self
.
logger
=
logger
# state updated during build, consumed by model
self
.
in_chs
=
None
def
_round_channels
(
self
,
chs
):
return
round_channels
(
chs
,
self
.
channel_multiplier
,
self
.
channel_divisor
,
self
.
channel_min
)
def
_make_block
(
self
,
ba
,
choice_idx
,
block_idx
,
block_count
,
resunit
=
False
,
dil_conv
=
False
):
drop_path_rate
=
self
.
drop_path_rate
*
block_idx
/
block_count
bt
=
ba
.
pop
(
'block_type'
)
ba
[
'in_chs'
]
=
self
.
in_chs
ba
[
'out_chs'
]
=
self
.
_round_channels
(
ba
[
'out_chs'
])
if
'fake_in_chs'
in
ba
and
ba
[
'fake_in_chs'
]:
# FIXME this is a hack to work around mismatch in origin impl input
# filters
ba
[
'fake_in_chs'
]
=
self
.
_round_channels
(
ba
[
'fake_in_chs'
])
ba
[
'norm_layer'
]
=
self
.
norm_layer
ba
[
'norm_kwargs'
]
=
self
.
norm_kwargs
ba
[
'pad_type'
]
=
self
.
pad_type
# block act fn overrides the model default
ba
[
'act_layer'
]
=
ba
[
'act_layer'
]
if
ba
[
'act_layer'
]
is
not
None
else
self
.
act_layer
assert
ba
[
'act_layer'
]
is
not
None
if
bt
==
'ir'
:
ba
[
'drop_path_rate'
]
=
drop_path_rate
ba
[
'se_kwargs'
]
=
self
.
se_kwargs
if
self
.
verbose
:
self
.
logger
.
info
(
' InvertedResidual {}, Args: {}'
.
format
(
block_idx
,
str
(
ba
)))
block
=
InvertedResidual
(
**
ba
)
elif
bt
==
'ds'
or
bt
==
'dsa'
:
ba
[
'drop_path_rate'
]
=
drop_path_rate
ba
[
'se_kwargs'
]
=
self
.
se_kwargs
if
self
.
verbose
:
self
.
logger
.
info
(
' DepthwiseSeparable {}, Args: {}'
.
format
(
block_idx
,
str
(
ba
)))
block
=
DepthwiseSeparableConv
(
**
ba
)
elif
bt
==
'cn'
:
if
self
.
verbose
:
self
.
logger
.
info
(
' ConvBnAct {}, Args: {}'
.
format
(
block_idx
,
str
(
ba
)))
block
=
ConvBnAct
(
**
ba
)
else
:
assert
False
,
'Uknkown block type (%s) while building model.'
%
bt
if
choice_idx
==
self
.
choice_num
-
1
:
self
.
in_chs
=
ba
[
'out_chs'
]
# update in_chs for arg of next block
return
block
def
__call__
(
self
,
in_chs
,
model_block_args
):
""" Build the blocks
Args:
in_chs: Number of input-channels passed to first block
model_block_args: A list of lists, outer list defines stages, inner
list contains strings defining block configuration(s)
Return:
List of block stacks (each stack wrapped in nn.Sequential)
"""
if
self
.
verbose
:
logging
.
info
(
'Building model trunk with %d stages...'
%
len
(
model_block_args
))
self
.
in_chs
=
in_chs
total_block_count
=
sum
([
len
(
x
)
for
x
in
model_block_args
])
total_block_idx
=
0
current_stride
=
2
current_dilation
=
1
feature_idx
=
0
stages
=
[]
# outer list of block_args defines the stacks ('stages' by some conventions)
for
stage_idx
,
stage_block_args
in
enumerate
(
model_block_args
):
last_stack
=
stage_idx
==
(
len
(
model_block_args
)
-
1
)
if
self
.
verbose
:
self
.
logger
.
info
(
'Stack: {}'
.
format
(
stage_idx
))
assert
isinstance
(
stage_block_args
,
list
)
# blocks = []
# each stack (stage) contains a list of block arguments
for
block_idx
,
block_args
in
enumerate
(
stage_block_args
):
last_block
=
block_idx
==
(
len
(
stage_block_args
)
-
1
)
if
self
.
verbose
:
self
.
logger
.
info
(
' Block: {}'
.
format
(
block_idx
))
# Sort out stride, dilation, and feature extraction details
assert
block_args
[
'stride'
]
in
(
1
,
2
)
if
block_idx
>=
1
:
# only the first block in any stack can have a stride > 1
block_args
[
'stride'
]
=
1
next_dilation
=
current_dilation
if
block_args
[
'stride'
]
>
1
:
next_output_stride
=
current_stride
*
block_args
[
'stride'
]
if
next_output_stride
>
self
.
output_stride
:
next_dilation
=
current_dilation
*
block_args
[
'stride'
]
block_args
[
'stride'
]
=
1
else
:
current_stride
=
next_output_stride
block_args
[
'dilation'
]
=
current_dilation
if
next_dilation
!=
current_dilation
:
current_dilation
=
next_dilation
if
stage_idx
==
0
or
stage_idx
==
6
:
self
.
choice_num
=
1
else
:
self
.
choice_num
=
len
(
self
.
choices
)
if
self
.
dil_conv
:
self
.
choice_num
+=
2
choice_blocks
=
[]
block_args_copy
=
deepcopy
(
block_args
)
if
self
.
choice_num
==
1
:
# create the block
block
=
self
.
_make_block
(
block_args
,
0
,
total_block_idx
,
total_block_count
)
choice_blocks
.
append
(
block
)
else
:
for
choice_idx
,
choice
in
enumerate
(
self
.
choices
):
# create the block
block_args
=
deepcopy
(
block_args_copy
)
block_args
=
modify_block_args
(
block_args
,
choice
[
0
],
choice
[
1
])
block
=
self
.
_make_block
(
block_args
,
choice_idx
,
total_block_idx
,
total_block_count
)
choice_blocks
.
append
(
block
)
if
self
.
dil_conv
:
block_args
=
deepcopy
(
block_args_copy
)
block_args
=
modify_block_args
(
block_args
,
3
,
0
)
block
=
self
.
_make_block
(
block_args
,
self
.
choice_num
-
2
,
total_block_idx
,
total_block_count
,
resunit
=
self
.
resunit
,
dil_conv
=
self
.
dil_conv
)
choice_blocks
.
append
(
block
)
block_args
=
deepcopy
(
block_args_copy
)
block_args
=
modify_block_args
(
block_args
,
5
,
0
)
block
=
self
.
_make_block
(
block_args
,
self
.
choice_num
-
1
,
total_block_idx
,
total_block_count
,
resunit
=
self
.
resunit
,
dil_conv
=
self
.
dil_conv
)
choice_blocks
.
append
(
block
)
if
self
.
resunit
:
block
=
get_Bottleneck
(
block
.
conv_pw
.
in_channels
,
block
.
conv_pwl
.
out_channels
,
block
.
conv_dw
.
stride
[
0
])
choice_blocks
.
append
(
block
)
choice_block
=
mutables
.
LayerChoice
(
choice_blocks
)
stages
.
append
(
choice_block
)
# create the block
# block = self._make_block(block_args, total_block_idx, total_block_count)
total_block_idx
+=
1
# incr global block idx (across all stacks)
# stages.append(blocks)
return
stages
examples/nas/legacy/cream/lib/models/structures/childnet.py
0 → 100755
View file @
1011377c
from
lib.utils.builder_util
import
*
from
lib.models.builders.build_childnet
import
*
from
timm.models.layers
import
SelectAdaptivePool2d
from
timm.models.layers.activations
import
hard_sigmoid
class
ChildNet
(
nn
.
Module
):
def
__init__
(
self
,
block_args
,
num_classes
=
1000
,
in_chans
=
3
,
stem_size
=
16
,
num_features
=
1280
,
head_bias
=
True
,
channel_multiplier
=
1.0
,
pad_type
=
''
,
act_layer
=
nn
.
ReLU
,
drop_rate
=
0.
,
drop_path_rate
=
0.
,
se_kwargs
=
None
,
norm_layer
=
nn
.
BatchNorm2d
,
norm_kwargs
=
None
,
global_pool
=
'avg'
,
logger
=
None
,
verbose
=
False
):
super
(
ChildNet
,
self
).
__init__
()
self
.
num_classes
=
num_classes
self
.
num_features
=
num_features
self
.
drop_rate
=
drop_rate
self
.
_in_chs
=
in_chans
self
.
logger
=
logger
# Stem
stem_size
=
round_channels
(
stem_size
,
channel_multiplier
)
self
.
conv_stem
=
create_conv2d
(
self
.
_in_chs
,
stem_size
,
3
,
stride
=
2
,
padding
=
pad_type
)
self
.
bn1
=
norm_layer
(
stem_size
,
**
norm_kwargs
)
self
.
act1
=
act_layer
(
inplace
=
True
)
self
.
_in_chs
=
stem_size
# Middle stages (IR/ER/DS Blocks)
builder
=
ChildNetBuilder
(
channel_multiplier
,
8
,
None
,
32
,
pad_type
,
act_layer
,
se_kwargs
,
norm_layer
,
norm_kwargs
,
drop_path_rate
,
verbose
=
verbose
)
self
.
blocks
=
nn
.
Sequential
(
*
builder
(
self
.
_in_chs
,
block_args
))
# self.blocks = builder(self._in_chs, block_args)
self
.
_in_chs
=
builder
.
in_chs
# Head + Pooling
self
.
global_pool
=
SelectAdaptivePool2d
(
pool_type
=
global_pool
)
self
.
conv_head
=
create_conv2d
(
self
.
_in_chs
,
self
.
num_features
,
1
,
padding
=
pad_type
,
bias
=
head_bias
)
self
.
act2
=
act_layer
(
inplace
=
True
)
# Classifier
self
.
classifier
=
nn
.
Linear
(
self
.
num_features
*
self
.
global_pool
.
feat_mult
(),
self
.
num_classes
)
efficientnet_init_weights
(
self
)
def
get_classifier
(
self
):
return
self
.
classifier
def
reset_classifier
(
self
,
num_classes
,
global_pool
=
'avg'
):
self
.
global_pool
=
SelectAdaptivePool2d
(
pool_type
=
global_pool
)
self
.
num_classes
=
num_classes
self
.
classifier
=
nn
.
Linear
(
self
.
num_features
*
self
.
global_pool
.
feat_mult
(),
num_classes
)
if
self
.
num_classes
else
None
def
forward_features
(
self
,
x
):
# architecture = [[0], [], [], [], [], [0]]
x
=
self
.
conv_stem
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
act1
(
x
)
x
=
self
.
blocks
(
x
)
x
=
self
.
global_pool
(
x
)
x
=
self
.
conv_head
(
x
)
x
=
self
.
act2
(
x
)
return
x
def
forward
(
self
,
x
):
x
=
self
.
forward_features
(
x
)
x
=
x
.
flatten
(
1
)
if
self
.
drop_rate
>
0.
:
x
=
F
.
dropout
(
x
,
p
=
self
.
drop_rate
,
training
=
self
.
training
)
x
=
self
.
classifier
(
x
)
return
x
def
gen_childnet
(
arch_list
,
arch_def
,
**
kwargs
):
# arch_list = [[0], [], [], [], [], [0]]
choices
=
{
'kernel_size'
:
[
3
,
5
,
7
],
'exp_ratio'
:
[
4
,
6
]}
choices_list
=
[[
x
,
y
]
for
x
in
choices
[
'kernel_size'
]
for
y
in
choices
[
'exp_ratio'
]]
num_features
=
1280
# act_layer = HardSwish
act_layer
=
Swish
new_arch
=
[]
# change to child arch_def
for
i
,
(
layer_choice
,
layer_arch
)
in
enumerate
(
zip
(
arch_list
,
arch_def
)):
if
len
(
layer_arch
)
==
1
:
new_arch
.
append
(
layer_arch
)
continue
else
:
new_layer
=
[]
for
j
,
(
block_choice
,
block_arch
)
in
enumerate
(
zip
(
layer_choice
,
layer_arch
)):
kernel_size
,
exp_ratio
=
choices_list
[
block_choice
]
elements
=
block_arch
.
split
(
'_'
)
block_arch
=
block_arch
.
replace
(
elements
[
2
],
'k{}'
.
format
(
str
(
kernel_size
)))
block_arch
=
block_arch
.
replace
(
elements
[
4
],
'e{}'
.
format
(
str
(
exp_ratio
)))
new_layer
.
append
(
block_arch
)
new_arch
.
append
(
new_layer
)
model_kwargs
=
dict
(
block_args
=
decode_arch_def
(
new_arch
),
num_features
=
num_features
,
stem_size
=
16
,
norm_kwargs
=
resolve_bn_args
(
kwargs
),
act_layer
=
act_layer
,
se_kwargs
=
dict
(
act_layer
=
nn
.
ReLU
,
gate_fn
=
hard_sigmoid
,
reduce_mid
=
True
,
divisor
=
8
),
**
kwargs
,
)
model
=
ChildNet
(
**
model_kwargs
)
return
model
Prev
1
…
32
33
34
35
36
37
38
39
40
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment