Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50v1.5_pytorch
Commits
e129194a
Commit
e129194a
authored
Sep 26, 2023
by
Sugon_ldc
Browse files
add new model resnet50v1.5
parents
Pipeline
#571
failed with stages
in 0 seconds
Changes
197
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3610 additions
and
0 deletions
+3610
-0
image_classification/gpu_affinity.py.bak
image_classification/gpu_affinity.py.bak
+417
-0
image_classification/logger.py
image_classification/logger.py
+495
-0
image_classification/mixup.py
image_classification/mixup.py
+69
-0
image_classification/models/__init__.py
image_classification/models/__init__.py
+24
-0
image_classification/models/__pycache__/__init__.cpython-37.pyc
...classification/models/__pycache__/__init__.cpython-37.pyc
+0
-0
image_classification/models/__pycache__/common.cpython-37.pyc
...e_classification/models/__pycache__/common.cpython-37.pyc
+0
-0
image_classification/models/__pycache__/efficientnet.cpython-37.pyc
...sification/models/__pycache__/efficientnet.cpython-37.pyc
+0
-0
image_classification/models/__pycache__/entrypoints.cpython-37.pyc
...ssification/models/__pycache__/entrypoints.cpython-37.pyc
+0
-0
image_classification/models/__pycache__/model.cpython-37.pyc
image_classification/models/__pycache__/model.cpython-37.pyc
+0
-0
image_classification/models/__pycache__/resnet.cpython-37.pyc
...e_classification/models/__pycache__/resnet.cpython-37.pyc
+0
-0
image_classification/models/common.py
image_classification/models/common.py
+302
-0
image_classification/models/efficientnet.py
image_classification/models/efficientnet.py
+584
-0
image_classification/models/entrypoints.py
image_classification/models/entrypoints.py
+115
-0
image_classification/models/model.py
image_classification/models/model.py
+184
-0
image_classification/models/resnet.py
image_classification/models/resnet.py
+458
-0
image_classification/optimizers.py
image_classification/optimizers.py
+160
-0
image_classification/quantization.py
image_classification/quantization.py
+144
-0
image_classification/smoothing.py
image_classification/smoothing.py
+40
-0
image_classification/training.py
image_classification/training.py
+435
-0
image_classification/utils.py
image_classification/utils.py
+183
-0
No files found.
image_classification/gpu_affinity.py.bak
0 → 100644
View file @
e129194a
import collections
import itertools
import os
import pathlib
import re
import pynvml
from typing import Union
class Device:
# assume nvml returns list of 64 bit ints
_nvml_bit_affinity = 64
_nvml_affinity_elements = (
os.cpu_count() + _nvml_bit_affinity - 1
) // _nvml_bit_affinity
def __init__(self, device_idx):
super().__init__()
self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_idx)
def get_name(self):
return pynvml.nvmlDeviceGetName(self.handle)
def get_uuid(self):
return pynvml.nvmlDeviceGetUUID(self.handle)
def get_cpu_affinity(self):
affinity_string = ""
for j in pynvml.nvmlDeviceGetCpuAffinity(
self.handle, Device._nvml_affinity_elements
):
# assume nvml returns list of 64 bit ints
affinity_string = "{:064b}".format(j) + affinity_string
affinity_list = [int(x) for x in affinity_string]
affinity_list.reverse() # so core 0 is in 0th element of list
ret = [i for i, e in enumerate(affinity_list) if e != 0]
return ret
def get_thread_siblings_list():
"""
Returns a list of 2-element integer tuples representing pairs of
hyperthreading cores.
"""
path = "/sys/devices/system/cpu/cpu*/topology/thread_siblings_list"
thread_siblings_list = []
pattern = re.compile(r"(\d+)\D(\d+)")
for fname in pathlib.Path(path[0]).glob(path[1:]):
with open(fname) as f:
content = f.read().strip()
res = pattern.findall(content)
if res:
pair = tuple(sorted(map(int, res[0])))
thread_siblings_list.append(pair)
thread_siblings_list = list(set(thread_siblings_list))
return thread_siblings_list
def build_thread_siblings_dict(siblings_list):
siblings_dict = {}
for siblings_tuple in siblings_list:
for core in siblings_tuple:
siblings_dict[core] = siblings_tuple
return siblings_dict
def group_list_by_dict(affinity, siblings_dict):
sorted_affinity = sorted(affinity, key=lambda x: siblings_dict.get(x, (x,)))
grouped = itertools.groupby(
sorted_affinity, key=lambda x: siblings_dict.get(x, (x,))
)
grouped_affinity = []
for key, group in grouped:
grouped_affinity.append(tuple(group))
return grouped_affinity
def group_affinity_by_siblings(socket_affinities):
siblings_list = get_thread_siblings_list()
siblings_dict = build_thread_siblings_dict(siblings_list)
grouped_socket_affinities = []
for socket_affinity in socket_affinities:
grouped_socket_affinities.append(
group_list_by_dict(socket_affinity, siblings_dict)
)
return grouped_socket_affinities
def ungroup_affinities(affinities, cores):
ungrouped_affinities = []
for affinity in affinities:
if cores == "all_logical":
ungrouped_affinities.append(list(itertools.chain(*affinity)))
elif cores == "single_logical":
ungrouped_affinities.append([group[0] for group in affinity])
else:
raise RuntimeError("Unknown cores mode")
return ungrouped_affinities
def check_socket_affinities(socket_affinities):
# sets of cores should be either identical or disjoint
for i, j in itertools.product(socket_affinities, socket_affinities):
if not set(i) == set(j) and not set(i).isdisjoint(set(j)):
raise RuntimeError(
f"Sets of cores should be either identical or disjoint, "
f"but got {i} and {j}."
)
def get_socket_affinities(nproc_per_node, exclude_unavailable_cores=True):
devices = [Device(i) for i in range(nproc_per_node)]
socket_affinities = [dev.get_cpu_affinity() for dev in devices]
if exclude_unavailable_cores:
available_cores = os.sched_getaffinity(0)
socket_affinities = [
list(set(affinity) & available_cores) for affinity in socket_affinities
]
check_socket_affinities(socket_affinities)
return socket_affinities
def get_grouped_socket_affinities(nproc_per_node, exclude_unavailable_cores=True):
socket_affinities = get_socket_affinities(nproc_per_node, exclude_unavailable_cores)
grouped_socket_affinities = group_affinity_by_siblings(socket_affinities)
return grouped_socket_affinities
def set_socket_affinity(gpu_id, nproc_per_node, cores):
"""
The process is assigned with all available physical CPU cores from the CPU
socket connected to the GPU with a given id.
Args:
gpu_id: index of a GPU
nproc_per_node: number of processes per node
cores: 'all_logical' or 'single_logical'
"""
grouped_socket_affinities = get_grouped_socket_affinities(nproc_per_node)
ungrouped_affinities = ungroup_affinities(grouped_socket_affinities, cores)
os.sched_setaffinity(0, ungrouped_affinities[gpu_id])
def set_socket_single_affinity(gpu_id, nproc_per_node, cores):
"""
The process is assigned with the first available physical CPU core from the
list of all CPU physical cores from the CPU socket connected to the GPU with
a given id.
Args:
gpu_id: index of a GPU
nproc_per_node: number of processes per node
cores: 'all_logical' or 'single_logical'
"""
grouped_socket_affinities = get_grouped_socket_affinities(nproc_per_node)
single_grouped_socket_affinities = [
group[:1] for group in grouped_socket_affinities
]
ungrouped_affinities = ungroup_affinities(single_grouped_socket_affinities, cores)
os.sched_setaffinity(0, ungrouped_affinities[gpu_id])
def set_socket_single_unique_affinity(gpu_id, nproc_per_node, cores):
"""
The process is assigned with a single unique available physical CPU core
from the list of all CPU cores from the CPU socket connected to the GPU with
a given id.
Args:
gpu_id: index of a GPU
nproc_per_node: number of processes per node
cores: 'all_logical' or 'single_logical'
"""
grouped_socket_affinities = get_grouped_socket_affinities(nproc_per_node)
affinities = []
assigned_groups = set()
for grouped_socket_affinity in grouped_socket_affinities:
for group in grouped_socket_affinity:
if group not in assigned_groups:
affinities.append([group])
assigned_groups.add(group)
break
ungrouped_affinities = ungroup_affinities(affinities, cores)
os.sched_setaffinity(0, ungrouped_affinities[gpu_id])
def set_socket_unique_affinity(gpu_id, nproc_per_node, cores, mode, balanced=True):
"""
The process is assigned with a unique subset of available physical CPU
cores from the CPU socket connected to a GPU with a given id.
Assignment automatically includes hyperthreading siblings (if siblings are
available).
Args:
gpu_id: index of a GPU
nproc_per_node: number of processes per node
cores: 'all_logical' or 'single_logical'
mode: 'contiguous' or 'interleaved'
balanced: assign an equal number of physical cores to each process,
"""
grouped_socket_affinities = get_grouped_socket_affinities(nproc_per_node)
grouped_socket_affinities_to_device_ids = collections.defaultdict(list)
for idx, grouped_socket_affinity in enumerate(grouped_socket_affinities):
grouped_socket_affinities_to_device_ids[tuple(grouped_socket_affinity)].append(
idx
)
# compute minimal number of physical cores per GPU across all GPUs and
# sockets, code assigns this number of cores per GPU if balanced == True
min_physical_cores_per_gpu = min(
[
len(cores) // len(gpus)
for cores, gpus in grouped_socket_affinities_to_device_ids.items()
]
)
grouped_unique_affinities = [None] * nproc_per_node
for (
grouped_socket_affinity,
device_ids,
) in grouped_socket_affinities_to_device_ids.items():
devices_per_group = len(device_ids)
if balanced:
cores_per_device = min_physical_cores_per_gpu
grouped_socket_affinity = grouped_socket_affinity[
: devices_per_group * min_physical_cores_per_gpu
]
else:
cores_per_device = len(grouped_socket_affinity) // devices_per_group
for socket_subgroup_id, device_id in enumerate(device_ids):
# In theory there should be no difference in performance between
# 'interleaved' and 'contiguous' pattern on Intel-based DGX-1,
# but 'contiguous' should be better for DGX A100 because on AMD
# Rome 4 consecutive cores are sharing L3 cache.
# TODO: code doesn't attempt to automatically detect layout of
# L3 cache, also external environment may already exclude some
# cores, this code makes no attempt to detect it and to align
# mapping to multiples of 4.
if mode == "interleaved":
unique_grouped_affinity = list(
grouped_socket_affinity[socket_subgroup_id::devices_per_group]
)
elif mode == "contiguous":
unique_grouped_affinity = list(
grouped_socket_affinity[
socket_subgroup_id
* cores_per_device : (socket_subgroup_id + 1)
* cores_per_device
]
)
else:
raise RuntimeError("Unknown set_socket_unique_affinity mode")
grouped_unique_affinities[device_id] = unique_grouped_affinity
ungrouped_affinities = ungroup_affinities(grouped_unique_affinities, cores)
os.sched_setaffinity(0, ungrouped_affinities[gpu_id])
from enum import Enum, auto
class AffinityMode(Enum):
none = auto()
socket = auto()
socket_single = auto()
socket_single_unique = auto()
socket_unique_interleaved = auto()
socket_unique_contiguous = auto()
def set_affinity(
gpu_id,
nproc_per_node=None,
*,
mode: Union[str, AffinityMode] = AffinityMode.socket_unique_contiguous,
cores="all_logical",
balanced=True,
):
"""
The process is assigned with a proper CPU affinity that matches CPU-GPU
hardware architecture on a given platform. Usually, it improves and
stabilizes the performance of deep learning training workloads.
This function assumes that the workload runs in multi-process single-device
mode (there are multiple training processes, and each process is running on
a single GPU). This is typical for multi-GPU data-parallel training
workloads (e.g., using `torch.nn.parallel.DistributedDataParallel`).
Available affinity modes:
* 'socket' - the process is assigned with all available physical CPU cores
from the CPU socket connected to the GPU with a given id.
* 'socket_single' - the process is assigned with the first available
physical CPU core from the list of all CPU cores from the CPU socket
connected to the GPU with a given id (multiple GPUs could be assigned with
the same CPU core).
* 'socket_single_unique' - the process is assigned with a single unique
available physical CPU core from the list of all CPU cores from the CPU
socket connected to the GPU with a given id.
* 'socket_unique_interleaved' - the process is assigned with a unique
subset of available physical CPU cores from the CPU socket connected to a
GPU with a given id, cores are assigned with interleaved indexing pattern
* 'socket_unique_contiguous' - (the default) the process is assigned with a
unique subset of available physical CPU cores from the CPU socket connected
to a GPU with a given id, cores are assigned with contiguous indexing
pattern
Available "cores" modes:
* 'all_logical' - assigns the process with all logical cores associated with
a given corresponding physical core (i.e., automatically includes all
available hyperthreading siblings)
* 'single_logical' - assigns the process with only one logical core
associated with a given corresponding physical core (i.e., excludes
hyperthreading siblings)
'socket_unique_contiguous' is the recommended mode for deep learning
training workloads on NVIDIA DGX machines.
Args:
gpu_id: integer index of a GPU, value from 0 to 'nproc_per_node' - 1
nproc_per_node: number of processes per node
mode: affinity mode
balanced: assign an equal number of physical cores to each process,
affects only 'socket_unique_interleaved' and
'socket_unique_contiguous' affinity modes
cores: 'all_logical' or 'single_logical'
Returns a set of logical CPU cores on which the process is eligible to run.
Example:
import argparse
import os
import gpu_affinity
import torch
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'--local_rank',
type=int,
default=os.getenv('LOCAL_RANK', 0),
)
args = parser.parse_args()
nproc_per_node = torch.cuda.device_count()
affinity = gpu_affinity.set_affinity(args.local_rank, nproc_per_node)
print(f'{args.local_rank}: core affinity: {affinity}')
if __name__ == "__main__":
main()
Launch the example with:
python -m torch.distributed.launch --nproc_per_node <#GPUs> example.py
WARNING: On DGX A100, only half of the CPU cores have direct access to GPUs.
This function restricts execution only to the CPU cores directly connected
to GPUs, so on DGX A100, it will limit the code to half of the CPU cores and
half of CPU memory bandwidth (which may be fine for many DL models).
WARNING: Intel's OpenMP implementation resets affinity on the first call to
an OpenMP function after a fork. It's recommended to run with env variable:
`KMP_AFFINITY=disabled` if the affinity set by gpu_affinity should be
preserved after a fork (e.g. in PyTorch DataLoader workers).
"""
if not isinstance(mode, AffinityMode):
mode = AffinityMode[mode]
pynvml.nvmlInit()
if nproc_per_node is None:
nproc_per_node = pynvml.nvmlDeviceGetCount()
if mode == AffinityMode.none:
pass
elif mode == AffinityMode.socket:
set_socket_affinity(gpu_id, nproc_per_node, cores)
elif mode == AffinityMode.socket_single:
set_socket_single_affinity(gpu_id, nproc_per_node, cores)
elif mode == AffinityMode.socket_single_unique:
set_socket_single_unique_affinity(gpu_id, nproc_per_node, cores)
elif mode == AffinityMode.socket_unique_interleaved:
set_socket_unique_affinity(
gpu_id, nproc_per_node, cores, "interleaved", balanced
)
elif mode == AffinityMode.socket_unique_contiguous:
set_socket_unique_affinity(
gpu_id, nproc_per_node, cores, "contiguous", balanced
)
else:
raise RuntimeError("Unknown affinity mode")
affinity = os.sched_getaffinity(0)
return affinity
image_classification/logger.py
0 → 100644
View file @
e129194a
# Copyright (c) 2018-2019, NVIDIA CORPORATION
# Copyright (c) 2017- Facebook, Inc
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from
collections
import
OrderedDict
from
numbers
import
Number
import
dllogger
import
numpy
as
np
def
format_step
(
step
):
if
isinstance
(
step
,
str
):
return
step
s
=
""
if
len
(
step
)
>
0
:
if
isinstance
(
step
[
0
],
Number
):
s
+=
"Epoch: {} "
.
format
(
step
[
0
])
else
:
s
+=
"{} "
.
format
(
step
[
0
])
if
len
(
step
)
>
1
:
s
+=
"Iteration: {} "
.
format
(
step
[
1
])
if
len
(
step
)
>
2
:
s
+=
"Validation Iteration: {} "
.
format
(
step
[
2
])
if
len
(
step
)
==
0
:
s
=
"Summary:"
return
s
PERF_METER
=
lambda
:
Meter
(
AverageMeter
(),
AverageMeter
(),
AverageMeter
())
LOSS_METER
=
lambda
:
Meter
(
AverageMeter
(),
AverageMeter
(),
MinMeter
())
ACC_METER
=
lambda
:
Meter
(
AverageMeter
(),
AverageMeter
(),
MaxMeter
())
LR_METER
=
lambda
:
Meter
(
LastMeter
(),
LastMeter
(),
LastMeter
())
LAT_100
=
lambda
:
Meter
(
QuantileMeter
(
1
),
QuantileMeter
(
1
),
QuantileMeter
(
1
))
LAT_99
=
lambda
:
Meter
(
QuantileMeter
(
0.99
),
QuantileMeter
(
0.99
),
QuantileMeter
(
0.99
))
LAT_95
=
lambda
:
Meter
(
QuantileMeter
(
0.95
),
QuantileMeter
(
0.95
),
QuantileMeter
(
0.95
))
class
Meter
(
object
):
def
__init__
(
self
,
iteration_aggregator
,
epoch_aggregator
,
run_aggregator
):
self
.
run_aggregator
=
run_aggregator
self
.
epoch_aggregator
=
epoch_aggregator
self
.
iteration_aggregator
=
iteration_aggregator
def
record
(
self
,
val
,
n
=
1
):
self
.
iteration_aggregator
.
record
(
val
,
n
=
n
)
def
get_iteration
(
self
):
v
,
n
=
self
.
iteration_aggregator
.
get_val
()
return
v
def
reset_iteration
(
self
):
v
,
n
=
self
.
iteration_aggregator
.
get_data
()
self
.
iteration_aggregator
.
reset
()
if
v
is
not
None
:
self
.
epoch_aggregator
.
record
(
v
,
n
=
n
)
def
get_epoch
(
self
):
v
,
n
=
self
.
epoch_aggregator
.
get_val
()
return
v
def
reset_epoch
(
self
):
v
,
n
=
self
.
epoch_aggregator
.
get_data
()
self
.
epoch_aggregator
.
reset
()
if
v
is
not
None
:
self
.
run_aggregator
.
record
(
v
,
n
=
n
)
def
get_run
(
self
):
v
,
n
=
self
.
run_aggregator
.
get_val
()
return
v
def
reset_run
(
self
):
self
.
run_aggregator
.
reset
()
class
QuantileMeter
(
object
):
def
__init__
(
self
,
q
):
self
.
q
=
q
self
.
reset
()
def
reset
(
self
):
self
.
vals
=
[]
self
.
n
=
0
def
record
(
self
,
val
,
n
=
1
):
if
isinstance
(
val
,
list
):
self
.
vals
+=
val
self
.
n
+=
len
(
val
)
else
:
self
.
vals
+=
[
val
]
*
n
self
.
n
+=
n
def
get_val
(
self
):
if
not
self
.
vals
:
return
None
,
self
.
n
return
np
.
quantile
(
self
.
vals
,
self
.
q
,
interpolation
=
"nearest"
),
self
.
n
def
get_data
(
self
):
return
self
.
vals
,
self
.
n
class
MaxMeter
(
object
):
def
__init__
(
self
):
self
.
reset
()
def
reset
(
self
):
self
.
max
=
None
self
.
n
=
0
def
record
(
self
,
val
,
n
=
1
):
if
self
.
max
is
None
:
self
.
max
=
val
else
:
self
.
max
=
max
(
self
.
max
,
val
)
self
.
n
=
n
def
get_val
(
self
):
return
self
.
max
,
self
.
n
def
get_data
(
self
):
return
self
.
max
,
self
.
n
class
MinMeter
(
object
):
def
__init__
(
self
):
self
.
reset
()
def
reset
(
self
):
self
.
min
=
None
self
.
n
=
0
def
record
(
self
,
val
,
n
=
1
):
if
self
.
min
is
None
:
self
.
min
=
val
else
:
self
.
min
=
max
(
self
.
min
,
val
)
self
.
n
=
n
def
get_val
(
self
):
return
self
.
min
,
self
.
n
def
get_data
(
self
):
return
self
.
min
,
self
.
n
class
LastMeter
(
object
):
def
__init__
(
self
):
self
.
reset
()
def
reset
(
self
):
self
.
last
=
None
self
.
n
=
0
def
record
(
self
,
val
,
n
=
1
):
self
.
last
=
val
self
.
n
=
n
def
get_val
(
self
):
return
self
.
last
,
self
.
n
def
get_data
(
self
):
return
self
.
last
,
self
.
n
class
AverageMeter
(
object
):
def
__init__
(
self
):
self
.
reset
()
def
reset
(
self
):
self
.
n
=
0
self
.
val
=
0
def
record
(
self
,
val
,
n
=
1
):
self
.
n
+=
n
self
.
val
+=
val
*
n
def
get_val
(
self
):
if
self
.
n
==
0
:
return
None
,
0
return
self
.
val
/
self
.
n
,
self
.
n
def
get_data
(
self
):
if
self
.
n
==
0
:
return
None
,
0
return
self
.
val
/
self
.
n
,
self
.
n
class
Logger
(
object
):
def
__init__
(
self
,
print_interval
,
backends
,
start_epoch
=-
1
,
verbose
=
False
):
self
.
epoch
=
start_epoch
self
.
iteration
=
-
1
self
.
val_iteration
=
-
1
self
.
calib_iteration
=
-
1
self
.
metrics
=
OrderedDict
()
self
.
backends
=
backends
self
.
print_interval
=
print_interval
self
.
verbose
=
verbose
dllogger
.
init
(
backends
)
def
log_parameter
(
self
,
data
,
verbosity
=
0
):
dllogger
.
log
(
step
=
"PARAMETER"
,
data
=
data
,
verbosity
=
verbosity
)
def
register_metric
(
self
,
metric_name
,
meter
,
verbosity
=
0
,
metadata
=
{}):
if
self
.
verbose
:
print
(
"Registering metric: {}"
.
format
(
metric_name
))
self
.
metrics
[
metric_name
]
=
{
"meter"
:
meter
,
"level"
:
verbosity
}
dllogger
.
metadata
(
metric_name
,
metadata
)
def
log_metric
(
self
,
metric_name
,
val
,
n
=
1
):
self
.
metrics
[
metric_name
][
"meter"
].
record
(
val
,
n
=
n
)
def
start_iteration
(
self
,
mode
=
"train"
):
if
mode
==
"val"
:
self
.
val_iteration
+=
1
elif
mode
==
"train"
:
self
.
iteration
+=
1
elif
mode
==
"calib"
:
self
.
calib_iteration
+=
1
def
end_iteration
(
self
,
mode
=
"train"
):
if
mode
==
"val"
:
it
=
self
.
val_iteration
elif
mode
==
"train"
:
it
=
self
.
iteration
elif
mode
==
"calib"
:
it
=
self
.
calib_iteration
if
it
%
self
.
print_interval
==
0
or
mode
==
"calib"
:
metrics
=
{
n
:
m
for
n
,
m
in
self
.
metrics
.
items
()
if
n
.
startswith
(
mode
)}
if
mode
==
"train"
:
step
=
(
self
.
epoch
,
self
.
iteration
)
elif
mode
==
"val"
:
step
=
(
self
.
epoch
,
self
.
iteration
,
self
.
val_iteration
)
elif
mode
==
"calib"
:
step
=
(
"Calibration"
,
self
.
calib_iteration
)
verbositys
=
{
m
[
"level"
]
for
_
,
m
in
metrics
.
items
()}
for
ll
in
verbositys
:
llm
=
{
n
:
m
for
n
,
m
in
metrics
.
items
()
if
m
[
"level"
]
==
ll
}
dllogger
.
log
(
step
=
step
,
data
=
{
n
:
m
[
"meter"
].
get_iteration
()
for
n
,
m
in
llm
.
items
()},
verbosity
=
ll
,
)
for
n
,
m
in
metrics
.
items
():
m
[
"meter"
].
reset_iteration
()
dllogger
.
flush
()
def
start_epoch
(
self
):
self
.
epoch
+=
1
self
.
iteration
=
0
self
.
val_iteration
=
0
for
n
,
m
in
self
.
metrics
.
items
():
if
not
n
.
startswith
(
"calib"
):
m
[
"meter"
].
reset_epoch
()
def
end_epoch
(
self
):
for
n
,
m
in
self
.
metrics
.
items
():
if
not
n
.
startswith
(
"calib"
):
m
[
"meter"
].
reset_iteration
()
verbositys
=
{
m
[
"level"
]
for
_
,
m
in
self
.
metrics
.
items
()}
for
ll
in
verbositys
:
llm
=
{
n
:
m
for
n
,
m
in
self
.
metrics
.
items
()
if
m
[
"level"
]
==
ll
}
dllogger
.
log
(
step
=
(
self
.
epoch
,),
data
=
{
n
:
m
[
"meter"
].
get_epoch
()
for
n
,
m
in
llm
.
items
()},
)
def
start_calibration
(
self
):
self
.
calib_iteration
=
0
for
n
,
m
in
self
.
metrics
.
items
():
if
n
.
startswith
(
"calib"
):
m
[
"meter"
].
reset_epoch
()
def
end_calibration
(
self
):
for
n
,
m
in
self
.
metrics
.
items
():
if
n
.
startswith
(
"calib"
):
m
[
"meter"
].
reset_iteration
()
def
end
(
self
):
for
n
,
m
in
self
.
metrics
.
items
():
m
[
"meter"
].
reset_epoch
()
verbositys
=
{
m
[
"level"
]
for
_
,
m
in
self
.
metrics
.
items
()}
for
ll
in
verbositys
:
llm
=
{
n
:
m
for
n
,
m
in
self
.
metrics
.
items
()
if
m
[
"level"
]
==
ll
}
dllogger
.
log
(
step
=
tuple
(),
data
=
{
n
:
m
[
"meter"
].
get_run
()
for
n
,
m
in
llm
.
items
()}
)
for
n
,
m
in
self
.
metrics
.
items
():
m
[
"meter"
].
reset_epoch
()
dllogger
.
flush
()
def
iteration_generator_wrapper
(
self
,
gen
,
mode
=
"train"
):
for
g
in
gen
:
self
.
start_iteration
(
mode
=
mode
)
yield
g
self
.
end_iteration
(
mode
=
mode
)
def
epoch_generator_wrapper
(
self
,
gen
):
for
g
in
gen
:
self
.
start_epoch
()
yield
g
self
.
end_epoch
()
class
Metrics
:
ACC_METADATA
=
{
"unit"
:
"%"
,
"format"
:
":.2f"
}
IPS_METADATA
=
{
"unit"
:
"images/s"
,
"format"
:
":.2f"
}
TIME_METADATA
=
{
"unit"
:
"s"
,
"format"
:
":.5f"
}
LOSS_METADATA
=
{
"unit"
:
None
,
"format"
:
":.5f"
}
LR_METADATA
=
{
"unit"
:
None
,
"format"
:
":.5f"
}
def
__init__
(
self
,
logger
):
self
.
logger
=
logger
self
.
map
=
{}
def
log
(
self
,
**
kwargs
):
if
self
.
logger
is
None
:
return
for
k
,
v
in
kwargs
.
items
():
tks
=
self
.
map
.
get
(
k
,
[
k
])
for
tk
in
tks
:
if
isinstance
(
v
,
tuple
):
self
.
logger
.
log_metric
(
tk
,
v
[
0
],
v
[
1
])
else
:
self
.
logger
.
log_metric
(
tk
,
v
)
class
TrainingMetrics
(
Metrics
):
def
__init__
(
self
,
logger
):
super
().
__init__
(
logger
)
if
self
.
logger
is
not
None
:
self
.
map
=
{
"loss"
:
[
"train.loss"
],
"compute_ips"
:
[
"train.compute_ips"
],
"total_ips"
:
[
"train.total_ips"
],
"data_time"
:
[
"train.data_time"
],
"compute_time"
:
[
"train.compute_time"
],
"lr"
:
[
"train.lr"
],
"grad_scale"
:
[
"train.grad_scale"
],
}
logger
.
register_metric
(
"train.loss"
,
LOSS_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
LOSS_METADATA
,
)
logger
.
register_metric
(
"train.compute_ips"
,
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
IPS_METADATA
,
)
logger
.
register_metric
(
"train.total_ips"
,
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
IPS_METADATA
,
)
logger
.
register_metric
(
"train.data_time"
,
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
VERBOSE
,
metadata
=
Metrics
.
TIME_METADATA
,
)
logger
.
register_metric
(
"train.compute_time"
,
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
VERBOSE
,
metadata
=
Metrics
.
TIME_METADATA
,
)
logger
.
register_metric
(
"train.lr"
,
LR_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
)
logger
.
register_metric
(
"train.grad_scale"
,
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
LOSS_METADATA
,
)
class
ValidationMetrics
(
Metrics
):
def
__init__
(
self
,
logger
,
prefix
,
topk
):
super
().
__init__
(
logger
)
if
self
.
logger
is
not
None
:
self
.
map
=
{
"loss"
:
[
f
"
{
prefix
}
.loss"
],
"top1"
:
[
f
"
{
prefix
}
.top1"
],
f
"top
{
topk
}
"
:
[
f
"
{
prefix
}
.top
{
topk
}
"
],
"compute_ips"
:
[
f
"
{
prefix
}
.compute_ips"
],
"total_ips"
:
[
f
"
{
prefix
}
.total_ips"
],
"data_time"
:
[
f
"
{
prefix
}
.data_time"
],
"compute_time"
:
[
f
"
{
prefix
}
.compute_latency"
,
f
"
{
prefix
}
.compute_latency_at100"
,
f
"
{
prefix
}
.compute_latency_at99"
,
f
"
{
prefix
}
.compute_latency_at95"
,
],
}
logger
.
register_metric
(
f
"
{
prefix
}
.top1"
,
ACC_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
ACC_METADATA
,
)
logger
.
register_metric
(
f
"
{
prefix
}
.top
{
topk
}
"
,
ACC_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
ACC_METADATA
,
)
logger
.
register_metric
(
f
"
{
prefix
}
.loss"
,
LOSS_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
LOSS_METADATA
,
)
logger
.
register_metric
(
f
"
{
prefix
}
.compute_ips"
,
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
IPS_METADATA
,
)
logger
.
register_metric
(
f
"
{
prefix
}
.total_ips"
,
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
IPS_METADATA
,
)
logger
.
register_metric
(
f
"
{
prefix
}
.data_time"
,
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
VERBOSE
,
metadata
=
Metrics
.
TIME_METADATA
,
)
logger
.
register_metric
(
f
"
{
prefix
}
.compute_latency"
,
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
Metrics
.
TIME_METADATA
,
)
logger
.
register_metric
(
f
"
{
prefix
}
.compute_latency_at100"
,
LAT_100
(),
verbosity
=
dllogger
.
Verbosity
.
VERBOSE
,
metadata
=
Metrics
.
TIME_METADATA
,
)
logger
.
register_metric
(
f
"
{
prefix
}
.compute_latency_at99"
,
LAT_99
(),
verbosity
=
dllogger
.
Verbosity
.
VERBOSE
,
metadata
=
Metrics
.
TIME_METADATA
,
)
logger
.
register_metric
(
f
"
{
prefix
}
.compute_latency_at95"
,
LAT_95
(),
verbosity
=
dllogger
.
Verbosity
.
VERBOSE
,
metadata
=
Metrics
.
TIME_METADATA
,
)
image_classification/mixup.py
0 → 100644
View file @
e129194a
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the BSD 3-Clause License (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://opensource.org/licenses/BSD-3-Clause
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
torch
import
torch.nn
as
nn
import
numpy
as
np
def
mixup
(
alpha
,
data
,
target
):
with
torch
.
no_grad
():
bs
=
data
.
size
(
0
)
c
=
np
.
random
.
beta
(
alpha
,
alpha
)
perm
=
torch
.
randperm
(
bs
).
cuda
()
md
=
c
*
data
+
(
1
-
c
)
*
data
[
perm
,
:]
mt
=
c
*
target
+
(
1
-
c
)
*
target
[
perm
,
:]
return
md
,
mt
class
MixUpWrapper
(
object
):
def
__init__
(
self
,
alpha
,
dataloader
):
self
.
alpha
=
alpha
self
.
dataloader
=
dataloader
def
mixup_loader
(
self
,
loader
):
for
input
,
target
in
loader
:
i
,
t
=
mixup
(
self
.
alpha
,
input
,
target
)
yield
i
,
t
def
__iter__
(
self
):
return
self
.
mixup_loader
(
self
.
dataloader
)
def
__len__
(
self
):
return
len
(
self
.
dataloader
)
class
NLLMultiLabelSmooth
(
nn
.
Module
):
def
__init__
(
self
,
smoothing
=
0.0
):
super
(
NLLMultiLabelSmooth
,
self
).
__init__
()
self
.
confidence
=
1.0
-
smoothing
self
.
smoothing
=
smoothing
def
forward
(
self
,
x
,
target
):
if
self
.
training
:
x
=
x
.
float
()
target
=
target
.
float
()
logprobs
=
torch
.
nn
.
functional
.
log_softmax
(
x
,
dim
=-
1
)
nll_loss
=
-
logprobs
*
target
nll_loss
=
nll_loss
.
sum
(
-
1
)
smooth_loss
=
-
logprobs
.
mean
(
dim
=-
1
)
loss
=
self
.
confidence
*
nll_loss
+
self
.
smoothing
*
smooth_loss
return
loss
.
mean
()
else
:
return
torch
.
nn
.
functional
.
cross_entropy
(
x
,
target
)
image_classification/models/__init__.py
0 → 100644
View file @
e129194a
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the BSD 3-Clause License (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://opensource.org/licenses/BSD-3-Clause
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.entrypoints
import
nvidia_convnets_processing_utils
,
nvidia_efficientnet
from
.resnet
import
resnet50
,
resnext101_32x4d
,
se_resnext101_32x4d
from
.efficientnet
import
(
efficientnet_b0
,
efficientnet_b4
,
efficientnet_widese_b0
,
efficientnet_widese_b4
,
efficientnet_quant_b0
,
efficientnet_quant_b4
,
)
image_classification/models/__pycache__/__init__.cpython-37.pyc
0 → 100644
View file @
e129194a
File added
image_classification/models/__pycache__/common.cpython-37.pyc
0 → 100644
View file @
e129194a
File added
image_classification/models/__pycache__/efficientnet.cpython-37.pyc
0 → 100644
View file @
e129194a
File added
image_classification/models/__pycache__/entrypoints.cpython-37.pyc
0 → 100644
View file @
e129194a
File added
image_classification/models/__pycache__/model.cpython-37.pyc
0 → 100644
View file @
e129194a
File added
image_classification/models/__pycache__/resnet.cpython-37.pyc
0 → 100644
View file @
e129194a
File added
image_classification/models/common.py
0 → 100644
View file @
e129194a
import
copy
from
collections
import
OrderedDict
from
dataclasses
import
dataclass
from
typing
import
Optional
import
torch
import
warnings
from
torch
import
nn
import
torch.nn.functional
as
F
try
:
from
pytorch_quantization
import
nn
as
quant_nn
except
ImportError
as
e
:
warnings
.
warn
(
"pytorch_quantization module not found, quantization will not be available"
)
quant_nn
=
None
# LayerBuilder {{{
class
LayerBuilder
(
object
):
@
dataclass
class
Config
:
activation
:
str
=
"relu"
conv_init
:
str
=
"fan_in"
bn_momentum
:
Optional
[
float
]
=
None
bn_epsilon
:
Optional
[
float
]
=
None
def
__init__
(
self
,
config
:
"LayerBuilder.Config"
):
self
.
config
=
config
def
conv
(
self
,
kernel_size
,
in_planes
,
out_planes
,
groups
=
1
,
stride
=
1
,
bn
=
False
,
zero_init_bn
=
False
,
act
=
False
,
):
conv
=
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
=
kernel_size
,
groups
=
groups
,
stride
=
stride
,
padding
=
int
((
kernel_size
-
1
)
/
2
),
bias
=
False
,
)
nn
.
init
.
kaiming_normal_
(
conv
.
weight
,
mode
=
self
.
config
.
conv_init
,
nonlinearity
=
"relu"
)
layers
=
[(
"conv"
,
conv
)]
if
bn
:
layers
.
append
((
"bn"
,
self
.
batchnorm
(
out_planes
,
zero_init_bn
)))
if
act
:
layers
.
append
((
"act"
,
self
.
activation
()))
if
bn
or
act
:
return
nn
.
Sequential
(
OrderedDict
(
layers
))
else
:
return
conv
def
convDepSep
(
self
,
kernel_size
,
in_planes
,
out_planes
,
stride
=
1
,
bn
=
False
,
act
=
False
):
"""3x3 depthwise separable convolution with padding"""
c
=
self
.
conv
(
kernel_size
,
in_planes
,
out_planes
,
groups
=
in_planes
,
stride
=
stride
,
bn
=
bn
,
act
=
act
,
)
return
c
def
conv3x3
(
self
,
in_planes
,
out_planes
,
stride
=
1
,
groups
=
1
,
bn
=
False
,
act
=
False
):
"""3x3 convolution with padding"""
c
=
self
.
conv
(
3
,
in_planes
,
out_planes
,
groups
=
groups
,
stride
=
stride
,
bn
=
bn
,
act
=
act
)
return
c
def
conv1x1
(
self
,
in_planes
,
out_planes
,
stride
=
1
,
groups
=
1
,
bn
=
False
,
act
=
False
):
"""1x1 convolution with padding"""
c
=
self
.
conv
(
1
,
in_planes
,
out_planes
,
groups
=
groups
,
stride
=
stride
,
bn
=
bn
,
act
=
act
)
return
c
def
conv7x7
(
self
,
in_planes
,
out_planes
,
stride
=
1
,
groups
=
1
,
bn
=
False
,
act
=
False
):
"""7x7 convolution with padding"""
c
=
self
.
conv
(
7
,
in_planes
,
out_planes
,
groups
=
groups
,
stride
=
stride
,
bn
=
bn
,
act
=
act
)
return
c
def
conv5x5
(
self
,
in_planes
,
out_planes
,
stride
=
1
,
groups
=
1
,
bn
=
False
,
act
=
False
):
"""5x5 convolution with padding"""
c
=
self
.
conv
(
5
,
in_planes
,
out_planes
,
groups
=
groups
,
stride
=
stride
,
bn
=
bn
,
act
=
act
)
return
c
def
batchnorm
(
self
,
planes
,
zero_init
=
False
):
bn_cfg
=
{}
if
self
.
config
.
bn_momentum
is
not
None
:
bn_cfg
[
"momentum"
]
=
self
.
config
.
bn_momentum
if
self
.
config
.
bn_epsilon
is
not
None
:
bn_cfg
[
"eps"
]
=
self
.
config
.
bn_epsilon
bn
=
nn
.
BatchNorm2d
(
planes
,
**
bn_cfg
)
gamma_init_val
=
0
if
zero_init
else
1
nn
.
init
.
constant_
(
bn
.
weight
,
gamma_init_val
)
nn
.
init
.
constant_
(
bn
.
bias
,
0
)
return
bn
def
activation
(
self
):
return
{
"silu"
:
lambda
:
nn
.
SiLU
(
inplace
=
True
),
"relu"
:
lambda
:
nn
.
ReLU
(
inplace
=
True
),
"onnx-silu"
:
ONNXSiLU
,
}[
self
.
config
.
activation
]()
# LayerBuilder }}}
# LambdaLayer {{{
class
LambdaLayer
(
nn
.
Module
):
def
__init__
(
self
,
lmbd
):
super
().
__init__
()
self
.
lmbd
=
lmbd
def
forward
(
self
,
x
):
return
self
.
lmbd
(
x
)
# }}}
# SqueezeAndExcitation {{{
class
SqueezeAndExcitation
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
squeeze
,
activation
):
super
(
SqueezeAndExcitation
,
self
).
__init__
()
self
.
squeeze
=
nn
.
Linear
(
in_channels
,
squeeze
)
self
.
expand
=
nn
.
Linear
(
squeeze
,
in_channels
)
self
.
activation
=
activation
self
.
sigmoid
=
nn
.
Sigmoid
()
def
forward
(
self
,
x
):
return
self
.
_attention
(
x
)
def
_attention
(
self
,
x
):
out
=
torch
.
mean
(
x
,
[
2
,
3
])
out
=
self
.
squeeze
(
out
)
out
=
self
.
activation
(
out
)
out
=
self
.
expand
(
out
)
out
=
self
.
sigmoid
(
out
)
out
=
out
.
unsqueeze
(
2
).
unsqueeze
(
3
)
return
out
class
SqueezeAndExcitationTRT
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
squeeze
,
activation
):
super
(
SqueezeAndExcitationTRT
,
self
).
__init__
()
self
.
pooling
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
squeeze
=
nn
.
Conv2d
(
in_channels
,
squeeze
,
1
)
self
.
expand
=
nn
.
Conv2d
(
squeeze
,
in_channels
,
1
)
self
.
activation
=
activation
self
.
sigmoid
=
nn
.
Sigmoid
()
def
forward
(
self
,
x
):
return
self
.
_attention
(
x
)
def
_attention
(
self
,
x
):
out
=
self
.
pooling
(
x
)
out
=
self
.
squeeze
(
out
)
out
=
self
.
activation
(
out
)
out
=
self
.
expand
(
out
)
out
=
self
.
sigmoid
(
out
)
return
out
# }}}
# EMA {{{
class
EMA
:
def
__init__
(
self
,
mu
,
module_ema
):
self
.
mu
=
mu
self
.
module_ema
=
module_ema
def
__call__
(
self
,
module
,
step
=
None
):
if
step
is
None
:
mu
=
self
.
mu
else
:
mu
=
min
(
self
.
mu
,
(
1.0
+
step
)
/
(
10
+
step
))
def
strip_module
(
s
:
str
)
->
str
:
return
s
mesd
=
self
.
module_ema
.
state_dict
()
with
torch
.
no_grad
():
for
name
,
x
in
module
.
state_dict
().
items
():
if
name
.
endswith
(
"num_batches_tracked"
):
continue
n
=
strip_module
(
name
)
mesd
[
n
].
mul_
(
mu
)
mesd
[
n
].
add_
((
1.0
-
mu
)
*
x
)
# }}}
# ONNXSiLU {{{
# Since torch.nn.SiLU is not supported in ONNX,
# it is required to use this implementation in exported model (15-20% more GPU memory is needed)
class
ONNXSiLU
(
nn
.
Module
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
ONNXSiLU
,
self
).
__init__
()
def
forward
(
self
,
x
):
return
x
*
torch
.
sigmoid
(
x
)
# }}}
class
SequentialSqueezeAndExcitation
(
SqueezeAndExcitation
):
def
__init__
(
self
,
in_channels
,
squeeze
,
activation
,
quantized
=
False
):
super
().
__init__
(
in_channels
,
squeeze
,
activation
)
self
.
quantized
=
quantized
if
quantized
:
assert
quant_nn
is
not
None
,
"pytorch_quantization is not available"
self
.
mul_a_quantizer
=
quant_nn
.
TensorQuantizer
(
quant_nn
.
QuantConv2d
.
default_quant_desc_input
)
self
.
mul_b_quantizer
=
quant_nn
.
TensorQuantizer
(
quant_nn
.
QuantConv2d
.
default_quant_desc_input
)
else
:
self
.
mul_a_quantizer
=
nn
.
Identity
()
self
.
mul_b_quantizer
=
nn
.
Identity
()
def
forward
(
self
,
x
):
out
=
self
.
_attention
(
x
)
if
not
self
.
quantized
:
return
out
*
x
else
:
x_quant
=
self
.
mul_a_quantizer
(
out
)
return
x_quant
*
self
.
mul_b_quantizer
(
x
)
class
SequentialSqueezeAndExcitationTRT
(
SqueezeAndExcitationTRT
):
def
__init__
(
self
,
in_channels
,
squeeze
,
activation
,
quantized
=
False
):
super
().
__init__
(
in_channels
,
squeeze
,
activation
)
self
.
quantized
=
quantized
if
quantized
:
assert
quant_nn
is
not
None
,
"pytorch_quantization is not available"
self
.
mul_a_quantizer
=
quant_nn
.
TensorQuantizer
(
quant_nn
.
QuantConv2d
.
default_quant_desc_input
)
self
.
mul_b_quantizer
=
quant_nn
.
TensorQuantizer
(
quant_nn
.
QuantConv2d
.
default_quant_desc_input
)
else
:
self
.
mul_a_quantizer
=
nn
.
Identity
()
self
.
mul_b_quantizer
=
nn
.
Identity
()
def
forward
(
self
,
x
):
out
=
self
.
_attention
(
x
)
if
not
self
.
quantized
:
return
out
*
x
else
:
x_quant
=
self
.
mul_a_quantizer
(
out
)
return
x_quant
*
self
.
mul_b_quantizer
(
x
)
class
StochasticDepthResidual
(
nn
.
Module
):
def
__init__
(
self
,
survival_prob
:
float
):
super
().
__init__
()
self
.
survival_prob
=
survival_prob
self
.
register_buffer
(
"mask"
,
torch
.
ones
(()),
persistent
=
False
)
def
forward
(
self
,
residual
:
torch
.
Tensor
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
not
self
.
training
:
return
torch
.
add
(
residual
,
other
=
x
)
else
:
with
torch
.
no_grad
():
mask
=
F
.
dropout
(
self
.
mask
,
p
=
1
-
self
.
survival_prob
,
training
=
self
.
training
,
inplace
=
False
,
)
return
torch
.
addcmul
(
residual
,
mask
,
x
)
class
Flatten
(
nn
.
Module
):
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
return
x
.
squeeze
(
-
1
).
squeeze
(
-
1
)
image_classification/models/efficientnet.py
0 → 100644
View file @
e129194a
import
argparse
import
random
import
math
import
warnings
from
typing
import
List
,
Any
,
Optional
from
collections
import
namedtuple
,
OrderedDict
from
dataclasses
import
dataclass
,
replace
import
torch
from
torch
import
nn
from
functools
import
partial
try
:
from
pytorch_quantization
import
nn
as
quant_nn
from
..quantization
import
switch_on_quantization
except
ImportError
as
e
:
warnings
.
warn
(
"pytorch_quantization module not found, quantization will not be available"
)
quant_nn
=
None
import
contextlib
@
contextlib
.
contextmanager
def
switch_on_quantization
(
do_quantization
=
False
):
assert
not
do_quantization
,
"quantization is not available"
try
:
yield
finally
:
pass
from
.common
import
(
SequentialSqueezeAndExcitation
,
SequentialSqueezeAndExcitationTRT
,
LayerBuilder
,
StochasticDepthResidual
,
Flatten
,
)
from
.model
import
(
Model
,
ModelParams
,
ModelArch
,
OptimizerParams
,
create_entrypoint
,
EntryPoint
,
)
# EffNetArch {{{
@
dataclass
class
EffNetArch
(
ModelArch
):
block
:
Any
stem_channels
:
int
feature_channels
:
int
kernel
:
List
[
int
]
stride
:
List
[
int
]
num_repeat
:
List
[
int
]
expansion
:
List
[
int
]
channels
:
List
[
int
]
default_image_size
:
int
squeeze_excitation_ratio
:
float
=
0.25
def
enumerate
(
self
):
return
enumerate
(
zip
(
self
.
kernel
,
self
.
stride
,
self
.
num_repeat
,
self
.
expansion
,
self
.
channels
)
)
def
num_layers
(
self
):
_f
=
lambda
l
:
len
(
set
(
map
(
len
,
l
)))
l
=
[
self
.
kernel
,
self
.
stride
,
self
.
num_repeat
,
self
.
expansion
,
self
.
channels
]
assert
_f
(
l
)
==
1
return
len
(
self
.
kernel
)
@
staticmethod
def
_scale_width
(
width_coeff
,
divisor
=
8
):
def
_sw
(
num_channels
):
num_channels
*=
width_coeff
# Rounding should not go down by more than 10%
rounded_num_channels
=
max
(
divisor
,
int
(
num_channels
+
divisor
/
2
)
//
divisor
*
divisor
)
if
rounded_num_channels
<
0.9
*
num_channels
:
rounded_num_channels
+=
divisor
return
rounded_num_channels
return
_sw
@
staticmethod
def
_scale_depth
(
depth_coeff
):
def
_sd
(
num_repeat
):
return
int
(
math
.
ceil
(
num_repeat
*
depth_coeff
))
return
_sd
def
scale
(
self
,
wc
,
dc
,
dis
,
divisor
=
8
)
->
"EffNetArch"
:
sw
=
EffNetArch
.
_scale_width
(
wc
,
divisor
=
divisor
)
sd
=
EffNetArch
.
_scale_depth
(
dc
)
return
EffNetArch
(
block
=
self
.
block
,
stem_channels
=
sw
(
self
.
stem_channels
),
feature_channels
=
sw
(
self
.
feature_channels
),
kernel
=
self
.
kernel
,
stride
=
self
.
stride
,
num_repeat
=
list
(
map
(
sd
,
self
.
num_repeat
)),
expansion
=
self
.
expansion
,
channels
=
list
(
map
(
sw
,
self
.
channels
)),
default_image_size
=
dis
,
squeeze_excitation_ratio
=
self
.
squeeze_excitation_ratio
,
)
# }}}
# EffNetParams {{{
@
dataclass
class
EffNetParams
(
ModelParams
):
dropout
:
float
num_classes
:
int
=
1000
activation
:
str
=
"silu"
conv_init
:
str
=
"fan_in"
bn_momentum
:
float
=
1
-
0.99
bn_epsilon
:
float
=
1e-3
survival_prob
:
float
=
1
quantized
:
bool
=
False
trt
:
bool
=
False
def
parser
(
self
,
name
):
p
=
super
().
parser
(
name
)
p
.
add_argument
(
"--num_classes"
,
metavar
=
"N"
,
default
=
self
.
num_classes
,
type
=
int
,
help
=
"number of classes"
,
)
p
.
add_argument
(
"--conv_init"
,
default
=
self
.
conv_init
,
choices
=
[
"fan_in"
,
"fan_out"
],
type
=
str
,
help
=
"initialization mode for convolutional layers, see https://pytorch.org/docs/stable/nn.init.html#torch.nn.init.kaiming_normal_"
,
)
p
.
add_argument
(
"--bn_momentum"
,
default
=
self
.
bn_momentum
,
type
=
float
,
help
=
"Batch Norm momentum"
,
)
p
.
add_argument
(
"--bn_epsilon"
,
default
=
self
.
bn_epsilon
,
type
=
float
,
help
=
"Batch Norm epsilon"
,
)
p
.
add_argument
(
"--survival_prob"
,
default
=
self
.
survival_prob
,
type
=
float
,
help
=
"Survival probability for stochastic depth"
,
)
p
.
add_argument
(
"--dropout"
,
default
=
self
.
dropout
,
type
=
float
,
help
=
"Dropout drop prob"
)
p
.
add_argument
(
"--trt"
,
metavar
=
"True|False"
,
default
=
self
.
trt
,
type
=
bool
)
return
p
# }}}
class
EfficientNet
(
nn
.
Module
):
def
__init__
(
self
,
arch
:
EffNetArch
,
dropout
:
float
,
num_classes
:
int
=
1000
,
activation
:
str
=
"silu"
,
conv_init
:
str
=
"fan_in"
,
bn_momentum
:
float
=
1
-
0.99
,
bn_epsilon
:
float
=
1e-3
,
survival_prob
:
float
=
1
,
quantized
:
bool
=
False
,
trt
:
bool
=
False
,
):
self
.
quantized
=
quantized
with
switch_on_quantization
(
self
.
quantized
):
super
(
EfficientNet
,
self
).
__init__
()
self
.
arch
=
arch
self
.
num_layers
=
arch
.
num_layers
()
self
.
num_blocks
=
sum
(
arch
.
num_repeat
)
self
.
survival_prob
=
survival_prob
self
.
builder
=
LayerBuilder
(
LayerBuilder
.
Config
(
activation
=
activation
,
conv_init
=
conv_init
,
bn_momentum
=
bn_momentum
,
bn_epsilon
=
bn_epsilon
,
)
)
self
.
stem
=
self
.
_make_stem
(
arch
.
stem_channels
)
out_channels
=
arch
.
stem_channels
plc
=
0
layers
=
[]
for
i
,
(
k
,
s
,
r
,
e
,
c
)
in
arch
.
enumerate
():
layer
,
out_channels
=
self
.
_make_layer
(
block
=
arch
.
block
,
kernel_size
=
k
,
stride
=
s
,
num_repeat
=
r
,
expansion
=
e
,
in_channels
=
out_channels
,
out_channels
=
c
,
squeeze_excitation_ratio
=
arch
.
squeeze_excitation_ratio
,
prev_layer_count
=
plc
,
trt
=
trt
,
)
plc
=
plc
+
r
layers
.
append
(
layer
)
self
.
layers
=
nn
.
Sequential
(
*
layers
)
self
.
features
=
self
.
_make_features
(
out_channels
,
arch
.
feature_channels
)
self
.
classifier
=
self
.
_make_classifier
(
arch
.
feature_channels
,
num_classes
,
dropout
)
def
forward
(
self
,
x
):
x
=
self
.
stem
(
x
)
x
=
self
.
layers
(
x
)
x
=
self
.
features
(
x
)
x
=
self
.
classifier
(
x
)
return
x
def
extract_features
(
self
,
x
,
layers
=
None
):
if
layers
is
None
:
layers
=
[
f
"layer
{
i
+
1
}
"
for
i
in
range
(
self
.
num_layers
)]
+
[
"features"
,
"classifier"
,
]
run
=
[
i
for
i
in
range
(
self
.
num_layers
)
if
"classifier"
in
layers
or
"features"
in
layers
or
any
([
f
"layer
{
j
+
1
}
"
in
layers
for
j
in
range
(
i
,
self
.
num_layers
)])
]
output
=
{}
x
=
self
.
stem
(
x
)
for
l
in
run
:
fn
=
self
.
layers
[
l
]
x
=
fn
(
x
)
if
f
"layer
{
l
+
1
}
"
in
layers
:
output
[
f
"layer
{
l
+
1
}
"
]
=
x
if
"features"
in
layers
or
"classifier"
in
layers
:
x
=
self
.
features
(
x
)
if
"features"
in
layers
:
output
[
"features"
]
=
x
if
"classifier"
in
layers
:
output
[
"classifier"
]
=
self
.
classifier
(
x
)
return
output
# helper functions {{{
def
_make_stem
(
self
,
stem_width
):
return
nn
.
Sequential
(
OrderedDict
(
[
(
"conv"
,
self
.
builder
.
conv3x3
(
3
,
stem_width
,
stride
=
2
)),
(
"bn"
,
self
.
builder
.
batchnorm
(
stem_width
)),
(
"activation"
,
self
.
builder
.
activation
()),
]
)
)
def
_get_survival_prob
(
self
,
block_id
):
drop_rate
=
1.0
-
self
.
survival_prob
sp
=
1.0
-
drop_rate
*
float
(
block_id
)
/
self
.
num_blocks
return
sp
def
_make_features
(
self
,
in_channels
,
num_features
):
return
nn
.
Sequential
(
OrderedDict
(
[
(
"conv"
,
self
.
builder
.
conv1x1
(
in_channels
,
num_features
)),
(
"bn"
,
self
.
builder
.
batchnorm
(
num_features
)),
(
"activation"
,
self
.
builder
.
activation
()),
]
)
)
def
_make_classifier
(
self
,
num_features
,
num_classes
,
dropout
):
return
nn
.
Sequential
(
OrderedDict
(
[
(
"pooling"
,
nn
.
AdaptiveAvgPool2d
(
1
)),
(
"squeeze"
,
Flatten
()),
(
"dropout"
,
nn
.
Dropout
(
dropout
)),
(
"fc"
,
nn
.
Linear
(
num_features
,
num_classes
)),
]
)
)
def
_make_layer
(
self
,
block
,
kernel_size
,
stride
,
num_repeat
,
expansion
,
in_channels
,
out_channels
,
squeeze_excitation_ratio
,
prev_layer_count
,
trt
,
):
layers
=
[]
idx
=
0
survival_prob
=
self
.
_get_survival_prob
(
idx
+
prev_layer_count
)
blk
=
block
(
self
.
builder
,
kernel_size
,
in_channels
,
out_channels
,
expansion
,
stride
,
self
.
arch
.
squeeze_excitation_ratio
,
survival_prob
if
stride
==
1
and
in_channels
==
out_channels
else
1.0
,
self
.
quantized
,
trt
=
trt
,
)
layers
.
append
((
f
"block
{
idx
}
"
,
blk
))
for
idx
in
range
(
1
,
num_repeat
):
survival_prob
=
self
.
_get_survival_prob
(
idx
+
prev_layer_count
)
blk
=
block
(
self
.
builder
,
kernel_size
,
out_channels
,
out_channels
,
expansion
,
1
,
# stride
squeeze_excitation_ratio
,
survival_prob
,
self
.
quantized
,
trt
=
trt
,
)
layers
.
append
((
f
"block
{
idx
}
"
,
blk
))
return
nn
.
Sequential
(
OrderedDict
(
layers
)),
out_channels
def
ngc_checkpoint_remap
(
self
,
url
=
None
,
version
=
None
):
if
version
is
None
:
version
=
url
.
split
(
"/"
)[
8
]
def
to_sequential_remap
(
s
):
splited
=
s
.
split
(
"."
)
if
splited
[
0
].
startswith
(
"layer"
):
return
"."
.
join
(
[
"layers."
+
str
(
int
(
splited
[
0
][
len
(
"layer"
)
:])
-
1
)]
+
splited
[
1
:]
)
else
:
return
s
def
no_remap
(
s
):
return
s
return
{
"20.12.0"
:
to_sequential_remap
,
"21.03.0"
:
to_sequential_remap
}.
get
(
version
,
no_remap
)
# }}}
# MBConvBlock {{{
class
MBConvBlock
(
nn
.
Module
):
__constants__
=
[
"quantized"
]
def
__init__
(
self
,
builder
:
LayerBuilder
,
depsep_kernel_size
:
int
,
in_channels
:
int
,
out_channels
:
int
,
expand_ratio
:
int
,
stride
:
int
,
squeeze_excitation_ratio
:
float
,
squeeze_hidden
=
False
,
survival_prob
:
float
=
1.0
,
quantized
:
bool
=
False
,
trt
:
bool
=
False
,
):
super
().
__init__
()
self
.
quantized
=
quantized
self
.
residual
=
stride
==
1
and
in_channels
==
out_channels
hidden_dim
=
in_channels
*
expand_ratio
squeeze_base
=
hidden_dim
if
squeeze_hidden
else
in_channels
squeeze_dim
=
max
(
1
,
int
(
squeeze_base
*
squeeze_excitation_ratio
))
self
.
expand
=
(
None
if
in_channels
==
hidden_dim
else
builder
.
conv1x1
(
in_channels
,
hidden_dim
,
bn
=
True
,
act
=
True
)
)
self
.
depsep
=
builder
.
convDepSep
(
depsep_kernel_size
,
hidden_dim
,
hidden_dim
,
stride
,
bn
=
True
,
act
=
True
)
if
trt
or
self
.
quantized
:
# Need TRT mode for quantized in order to automatically insert quantization before pooling
self
.
se
:
nn
.
Module
=
SequentialSqueezeAndExcitationTRT
(
hidden_dim
,
squeeze_dim
,
builder
.
activation
(),
self
.
quantized
)
else
:
self
.
se
:
nn
.
Module
=
SequentialSqueezeAndExcitation
(
hidden_dim
,
squeeze_dim
,
builder
.
activation
(),
self
.
quantized
)
self
.
proj
=
builder
.
conv1x1
(
hidden_dim
,
out_channels
,
bn
=
True
)
if
survival_prob
==
1.0
:
self
.
residual_add
=
torch
.
add
else
:
self
.
residual_add
=
StochasticDepthResidual
(
survival_prob
=
survival_prob
)
if
self
.
quantized
and
self
.
residual
:
assert
quant_nn
is
not
None
,
"pytorch_quantization is not available"
self
.
residual_quantizer
=
quant_nn
.
TensorQuantizer
(
quant_nn
.
QuantConv2d
.
default_quant_desc_input
)
# TODO QuantConv2d ?!?
else
:
self
.
residual_quantizer
=
nn
.
Identity
()
def
forward
(
self
,
x
:
torch
.
Tensor
)
->
torch
.
Tensor
:
if
not
self
.
residual
:
return
self
.
proj
(
self
.
se
(
self
.
depsep
(
x
if
self
.
expand
is
None
else
self
.
expand
(
x
)))
)
b
=
self
.
proj
(
self
.
se
(
self
.
depsep
(
x
if
self
.
expand
is
None
else
self
.
expand
(
x
)))
)
if
self
.
quantized
:
x
=
self
.
residual_quantizer
(
x
)
return
self
.
residual_add
(
x
,
b
)
def
original_mbconv
(
builder
:
LayerBuilder
,
depsep_kernel_size
:
int
,
in_channels
:
int
,
out_channels
:
int
,
expand_ratio
:
int
,
stride
:
int
,
squeeze_excitation_ratio
:
int
,
survival_prob
:
float
,
quantized
:
bool
,
trt
:
bool
,
):
return
MBConvBlock
(
builder
,
depsep_kernel_size
,
in_channels
,
out_channels
,
expand_ratio
,
stride
,
squeeze_excitation_ratio
,
squeeze_hidden
=
False
,
survival_prob
=
survival_prob
,
quantized
=
quantized
,
trt
=
trt
,
)
def
widese_mbconv
(
builder
:
LayerBuilder
,
depsep_kernel_size
:
int
,
in_channels
:
int
,
out_channels
:
int
,
expand_ratio
:
int
,
stride
:
int
,
squeeze_excitation_ratio
:
int
,
survival_prob
:
float
,
quantized
:
bool
,
trt
:
bool
,
):
return
MBConvBlock
(
builder
,
depsep_kernel_size
,
in_channels
,
out_channels
,
expand_ratio
,
stride
,
squeeze_excitation_ratio
,
squeeze_hidden
=
True
,
survival_prob
=
survival_prob
,
quantized
=
quantized
,
trt
=
trt
,
)
# }}}
# EffNet configs {{{
# fmt: off
effnet_b0_layers
=
EffNetArch
(
block
=
original_mbconv
,
stem_channels
=
32
,
feature_channels
=
1280
,
kernel
=
[
3
,
3
,
5
,
3
,
5
,
5
,
3
],
stride
=
[
1
,
2
,
2
,
2
,
1
,
2
,
1
],
num_repeat
=
[
1
,
2
,
2
,
3
,
3
,
4
,
1
],
expansion
=
[
1
,
6
,
6
,
6
,
6
,
6
,
6
],
channels
=
[
16
,
24
,
40
,
80
,
112
,
192
,
320
],
default_image_size
=
224
,
)
effnet_b1_layers
=
effnet_b0_layers
.
scale
(
wc
=
1
,
dc
=
1.1
,
dis
=
240
)
effnet_b2_layers
=
effnet_b0_layers
.
scale
(
wc
=
1.1
,
dc
=
1.2
,
dis
=
260
)
effnet_b3_layers
=
effnet_b0_layers
.
scale
(
wc
=
1.2
,
dc
=
1.4
,
dis
=
300
)
effnet_b4_layers
=
effnet_b0_layers
.
scale
(
wc
=
1.4
,
dc
=
1.8
,
dis
=
380
)
effnet_b5_layers
=
effnet_b0_layers
.
scale
(
wc
=
1.6
,
dc
=
2.2
,
dis
=
456
)
effnet_b6_layers
=
effnet_b0_layers
.
scale
(
wc
=
1.8
,
dc
=
2.6
,
dis
=
528
)
effnet_b7_layers
=
effnet_b0_layers
.
scale
(
wc
=
2.0
,
dc
=
3.1
,
dis
=
600
)
urls
=
{
"efficientnet-b0"
:
"https://api.ngc.nvidia.com/v2/models/nvidia/efficientnet_b0_pyt_amp/versions/20.12.0/files/nvidia_efficientnet-b0_210412.pth"
,
"efficientnet-b4"
:
"https://api.ngc.nvidia.com/v2/models/nvidia/efficientnet_b4_pyt_amp/versions/20.12.0/files/nvidia_efficientnet-b4_210412.pth"
,
"efficientnet-widese-b0"
:
"https://api.ngc.nvidia.com/v2/models/nvidia/efficientnet_widese_b0_pyt_amp/versions/20.12.0/files/nvidia_efficientnet-widese-b0_210412.pth"
,
"efficientnet-widese-b4"
:
"https://api.ngc.nvidia.com/v2/models/nvidia/efficientnet_widese_b4_pyt_amp/versions/20.12.0/files/nvidia_efficientnet-widese-b4_210412.pth"
,
"efficientnet-quant-b0"
:
"https://api.ngc.nvidia.com/v2/models/nvidia/efficientnet_b0_pyt_qat_ckpt_fp32/versions/21.03.0/files/nvidia-efficientnet-quant-b0-130421.pth"
,
"efficientnet-quant-b4"
:
"https://api.ngc.nvidia.com/v2/models/nvidia/efficientnet_b4_pyt_qat_ckpt_fp32/versions/21.03.0/files/nvidia-efficientnet-quant-b4-130421.pth"
,
}
def
_m
(
*
args
,
**
kwargs
):
return
Model
(
constructor
=
EfficientNet
,
*
args
,
**
kwargs
)
architectures
=
{
"efficientnet-b0"
:
_m
(
arch
=
effnet_b0_layers
,
params
=
EffNetParams
(
dropout
=
0.2
),
checkpoint_url
=
urls
[
"efficientnet-b0"
]),
"efficientnet-b1"
:
_m
(
arch
=
effnet_b1_layers
,
params
=
EffNetParams
(
dropout
=
0.2
)),
"efficientnet-b2"
:
_m
(
arch
=
effnet_b2_layers
,
params
=
EffNetParams
(
dropout
=
0.3
)),
"efficientnet-b3"
:
_m
(
arch
=
effnet_b3_layers
,
params
=
EffNetParams
(
dropout
=
0.3
)),
"efficientnet-b4"
:
_m
(
arch
=
effnet_b4_layers
,
params
=
EffNetParams
(
dropout
=
0.4
,
survival_prob
=
0.8
),
checkpoint_url
=
urls
[
"efficientnet-b4"
]),
"efficientnet-b5"
:
_m
(
arch
=
effnet_b5_layers
,
params
=
EffNetParams
(
dropout
=
0.4
)),
"efficientnet-b6"
:
_m
(
arch
=
effnet_b6_layers
,
params
=
EffNetParams
(
dropout
=
0.5
)),
"efficientnet-b7"
:
_m
(
arch
=
effnet_b7_layers
,
params
=
EffNetParams
(
dropout
=
0.5
)),
"efficientnet-widese-b0"
:
_m
(
arch
=
replace
(
effnet_b0_layers
,
block
=
widese_mbconv
),
params
=
EffNetParams
(
dropout
=
0.2
),
checkpoint_url
=
urls
[
"efficientnet-widese-b0"
]),
"efficientnet-widese-b1"
:
_m
(
arch
=
replace
(
effnet_b1_layers
,
block
=
widese_mbconv
),
params
=
EffNetParams
(
dropout
=
0.2
)),
"efficientnet-widese-b2"
:
_m
(
arch
=
replace
(
effnet_b2_layers
,
block
=
widese_mbconv
),
params
=
EffNetParams
(
dropout
=
0.3
)),
"efficientnet-widese-b3"
:
_m
(
arch
=
replace
(
effnet_b3_layers
,
block
=
widese_mbconv
),
params
=
EffNetParams
(
dropout
=
0.3
)),
"efficientnet-widese-b4"
:
_m
(
arch
=
replace
(
effnet_b4_layers
,
block
=
widese_mbconv
),
params
=
EffNetParams
(
dropout
=
0.4
,
survival_prob
=
0.8
),
checkpoint_url
=
urls
[
"efficientnet-widese-b4"
]),
"efficientnet-widese-b5"
:
_m
(
arch
=
replace
(
effnet_b5_layers
,
block
=
widese_mbconv
),
params
=
EffNetParams
(
dropout
=
0.4
)),
"efficientnet-widese-b6"
:
_m
(
arch
=
replace
(
effnet_b6_layers
,
block
=
widese_mbconv
),
params
=
EffNetParams
(
dropout
=
0.5
)),
"efficientnet-widese-b7"
:
_m
(
arch
=
replace
(
effnet_b7_layers
,
block
=
widese_mbconv
),
params
=
EffNetParams
(
dropout
=
0.5
)),
"efficientnet-quant-b0"
:
_m
(
arch
=
effnet_b0_layers
,
params
=
EffNetParams
(
dropout
=
0.2
,
quantized
=
True
),
checkpoint_url
=
urls
[
"efficientnet-quant-b0"
]),
"efficientnet-quant-b1"
:
_m
(
arch
=
effnet_b1_layers
,
params
=
EffNetParams
(
dropout
=
0.2
,
quantized
=
True
)),
"efficientnet-quant-b2"
:
_m
(
arch
=
effnet_b2_layers
,
params
=
EffNetParams
(
dropout
=
0.3
,
quantized
=
True
)),
"efficientnet-quant-b3"
:
_m
(
arch
=
effnet_b3_layers
,
params
=
EffNetParams
(
dropout
=
0.3
,
quantized
=
True
)),
"efficientnet-quant-b4"
:
_m
(
arch
=
effnet_b4_layers
,
params
=
EffNetParams
(
dropout
=
0.4
,
survival_prob
=
0.8
,
quantized
=
True
),
checkpoint_url
=
urls
[
"efficientnet-quant-b4"
]),
"efficientnet-quant-b5"
:
_m
(
arch
=
effnet_b5_layers
,
params
=
EffNetParams
(
dropout
=
0.4
,
quantized
=
True
)),
"efficientnet-quant-b6"
:
_m
(
arch
=
effnet_b6_layers
,
params
=
EffNetParams
(
dropout
=
0.5
,
quantized
=
True
)),
"efficientnet-quant-b7"
:
_m
(
arch
=
effnet_b7_layers
,
params
=
EffNetParams
(
dropout
=
0.5
,
quantized
=
True
)),
}
# fmt: on
# }}}
_ce
=
lambda
n
:
EntryPoint
.
create
(
n
,
architectures
[
n
])
efficientnet_b0
=
_ce
(
"efficientnet-b0"
)
efficientnet_b4
=
_ce
(
"efficientnet-b4"
)
efficientnet_widese_b0
=
_ce
(
"efficientnet-widese-b0"
)
efficientnet_widese_b4
=
_ce
(
"efficientnet-widese-b4"
)
efficientnet_quant_b0
=
_ce
(
"efficientnet-quant-b0"
)
efficientnet_quant_b4
=
_ce
(
"efficientnet-quant-b4"
)
image_classification/models/entrypoints.py
0 → 100644
View file @
e129194a
# Copyright (c) 2018-2019, NVIDIA CORPORATION
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
def
nvidia_efficientnet
(
type
=
'efficient-b0'
,
pretrained
=
True
,
**
kwargs
):
"""Constructs a EfficientNet model.
For detailed information on model input and output, training recipies, inference and performance
visit: github.com/NVIDIA/DeepLearningExamples and/or ngc.nvidia.com
Args:
pretrained (bool, True): If True, returns a model pretrained on IMAGENET dataset.
"""
from
.efficientnet
import
_ce
return
_ce
(
type
)(
pretrained
=
pretrained
,
**
kwargs
)
def
nvidia_convnets_processing_utils
():
import
numpy
as
np
import
torch
from
PIL
import
Image
import
torchvision.transforms
as
transforms
import
numpy
as
np
import
json
import
requests
import
validators
class
Processing
:
@
staticmethod
def
prepare_input_from_uri
(
uri
,
cuda
=
False
):
img_transforms
=
transforms
.
Compose
(
[
transforms
.
Resize
(
256
),
transforms
.
CenterCrop
(
224
),
transforms
.
ToTensor
()]
)
if
(
validators
.
url
(
uri
)):
img
=
Image
.
open
(
requests
.
get
(
uri
,
stream
=
True
).
raw
)
else
:
img
=
Image
.
open
(
uri
)
img
=
img_transforms
(
img
)
with
torch
.
no_grad
():
# mean and std are not multiplied by 255 as they are in training script
# torch dataloader reads data into bytes whereas loading directly
# through PIL creates a tensor with floats in [0,1] range
mean
=
torch
.
tensor
([
0.485
,
0.456
,
0.406
]).
view
(
1
,
3
,
1
,
1
)
std
=
torch
.
tensor
([
0.229
,
0.224
,
0.225
]).
view
(
1
,
3
,
1
,
1
)
img
=
img
.
float
()
if
cuda
:
mean
=
mean
.
cuda
()
std
=
std
.
cuda
()
img
=
img
.
cuda
()
input
=
img
.
unsqueeze
(
0
).
sub_
(
mean
).
div_
(
std
)
return
input
@
staticmethod
def
pick_n_best
(
predictions
,
n
=
5
):
predictions
=
predictions
.
float
().
cpu
().
numpy
()
topN
=
np
.
argsort
(
-
1
*
predictions
,
axis
=-
1
)[:,:
n
]
imgnet_classes
=
Processing
.
get_imgnet_classes
()
results
=
[]
for
idx
,
case
in
enumerate
(
topN
):
r
=
[]
for
c
,
v
in
zip
(
imgnet_classes
[
case
],
predictions
[
idx
,
case
]):
r
.
append
((
f
"
{
c
}
"
,
f
"
{
100
*
v
:.
1
f
}
%"
))
print
(
f
"sample
{
idx
}
:
{
r
}
"
)
results
.
append
(
r
)
return
results
@
staticmethod
def
get_imgnet_classes
():
import
os
import
json
imgnet_classes_json
=
"LOC_synset_mapping.json"
if
not
os
.
path
.
exists
(
imgnet_classes_json
):
print
(
"Downloading Imagenet Classes names."
)
import
urllib
urllib
.
request
.
urlretrieve
(
"https://raw.githubusercontent.com/NVIDIA/DeepLearningExamples/master/PyTorch/Classification/ConvNets/LOC_synset_mapping.json"
,
filename
=
imgnet_classes_json
)
print
(
"Downloading finished."
)
imgnet_classes
=
np
.
array
(
json
.
load
(
open
(
imgnet_classes_json
,
"r"
)))
return
imgnet_classes
return
Processing
()
image_classification/models/model.py
0 → 100644
View file @
e129194a
from
dataclasses
import
dataclass
,
asdict
,
replace
from
.common
import
(
SequentialSqueezeAndExcitationTRT
,
SequentialSqueezeAndExcitation
,
SqueezeAndExcitation
,
SqueezeAndExcitationTRT
,
)
from
typing
import
Optional
,
Callable
import
os
import
torch
import
argparse
from
functools
import
partial
@
dataclass
class
ModelArch
:
pass
@
dataclass
class
ModelParams
:
def
parser
(
self
,
name
):
return
argparse
.
ArgumentParser
(
description
=
f
"
{
name
}
arguments"
,
add_help
=
False
,
usage
=
""
)
@
dataclass
class
OptimizerParams
:
pass
@
dataclass
class
Model
:
constructor
:
Callable
arch
:
ModelArch
params
:
Optional
[
ModelParams
]
optimizer_params
:
Optional
[
OptimizerParams
]
=
None
checkpoint_url
:
Optional
[
str
]
=
None
def
torchhub_docstring
(
name
:
str
):
return
f
"""Constructs a
{
name
}
model.
For detailed information on model input and output, training recipies, inference and performance
visit: github.com/NVIDIA/DeepLearningExamples and/or ngc.nvidia.com
Args:
pretrained (bool, True): If True, returns a model pretrained on IMAGENET dataset.
"""
class
EntryPoint
:
@
staticmethod
def
create
(
name
:
str
,
model
:
Model
):
ep
=
EntryPoint
(
name
,
model
)
ep
.
__doc__
=
torchhub_docstring
(
name
)
return
ep
def
__init__
(
self
,
name
:
str
,
model
:
Model
):
self
.
name
=
name
self
.
model
=
model
def
__call__
(
self
,
pretrained
=
True
,
pretrained_from_file
=
None
,
state_dict_key_map_fn
=
None
,
**
kwargs
,
):
assert
not
(
pretrained
and
(
pretrained_from_file
is
not
None
))
params
=
replace
(
self
.
model
.
params
,
**
kwargs
)
model
=
self
.
model
.
constructor
(
arch
=
self
.
model
.
arch
,
**
asdict
(
params
))
state_dict
=
None
if
pretrained
:
assert
self
.
model
.
checkpoint_url
is
not
None
state_dict
=
torch
.
hub
.
load_state_dict_from_url
(
self
.
model
.
checkpoint_url
,
map_location
=
torch
.
device
(
"cpu"
),
progress
=
True
,
)
if
pretrained_from_file
is
not
None
:
if
os
.
path
.
isfile
(
pretrained_from_file
):
print
(
"=> loading pretrained weights from '{}'"
.
format
(
pretrained_from_file
)
)
state_dict
=
torch
.
load
(
pretrained_from_file
,
map_location
=
torch
.
device
(
"cpu"
)
)
else
:
print
(
"=> no pretrained weights found at '{}'"
.
format
(
pretrained_from_file
)
)
if
state_dict
is
not
None
:
state_dict
=
{
k
[
len
(
"module."
)
:]
if
k
.
startswith
(
"module."
)
else
k
:
v
for
k
,
v
in
state_dict
.
items
()
}
def
reshape
(
t
,
conv
):
if
conv
:
if
len
(
t
.
shape
)
==
4
:
return
t
else
:
return
t
.
view
(
t
.
shape
[
0
],
-
1
,
1
,
1
)
else
:
if
len
(
t
.
shape
)
==
4
:
return
t
.
view
(
t
.
shape
[
0
],
t
.
shape
[
1
])
else
:
return
t
if
state_dict_key_map_fn
is
not
None
:
state_dict
=
{
state_dict_key_map_fn
(
k
):
v
for
k
,
v
in
state_dict
.
items
()
}
if
pretrained
and
hasattr
(
model
,
"ngc_checkpoint_remap"
):
remap_fn
=
model
.
ngc_checkpoint_remap
(
url
=
self
.
model
.
checkpoint_url
)
state_dict
=
{
remap_fn
(
k
):
v
for
k
,
v
in
state_dict
.
items
()}
def
_se_layer_uses_conv
(
m
):
return
any
(
map
(
partial
(
isinstance
,
m
),
[
SqueezeAndExcitationTRT
,
SequentialSqueezeAndExcitationTRT
,
],
)
)
state_dict
=
{
k
:
reshape
(
v
,
conv
=
_se_layer_uses_conv
(
dict
(
model
.
named_modules
())[
"."
.
join
(
k
.
split
(
"."
)[:
-
2
])]
),
)
if
is_se_weight
(
k
,
v
)
else
v
for
k
,
v
in
state_dict
.
items
()
}
model
.
load_state_dict
(
state_dict
)
return
model
def
parser
(
self
):
if
self
.
model
.
params
is
None
:
return
None
parser
=
self
.
model
.
params
.
parser
(
self
.
name
)
parser
.
add_argument
(
"--pretrained-from-file"
,
default
=
None
,
type
=
str
,
metavar
=
"PATH"
,
help
=
"load weights from local file"
,
)
if
self
.
model
.
checkpoint_url
is
not
None
:
parser
.
add_argument
(
"--pretrained"
,
default
=
False
,
action
=
"store_true"
,
help
=
"load pretrained weights from NGC"
,
)
return
parser
def
is_se_weight
(
key
,
value
):
return
key
.
endswith
(
"squeeze.weight"
)
or
key
.
endswith
(
"expand.weight"
)
def
create_entrypoint
(
m
:
Model
):
def
_ep
(
**
kwargs
):
params
=
replace
(
m
.
params
,
**
kwargs
)
return
m
.
constructor
(
arch
=
m
.
arch
,
**
asdict
(
params
))
return
_ep
image_classification/models/resnet.py
0 → 100644
View file @
e129194a
# Copyright (c) 2018-2019, NVIDIA CORPORATION
# Copyright (c) 2017- Facebook, Inc
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
argparse
from
collections
import
OrderedDict
from
dataclasses
import
dataclass
from
typing
import
List
,
Dict
,
Callable
,
Any
,
Type
import
torch
import
torch.nn
as
nn
from
.common
import
(
SqueezeAndExcitation
,
LayerBuilder
,
SqueezeAndExcitationTRT
,
)
from
.model
import
(
Model
,
ModelParams
,
ModelArch
,
EntryPoint
,
)
__all__
=
[
"ResNet"
,
"resnet_configs"
]
# BasicBlock {{{
class
BasicBlock
(
nn
.
Module
):
def
__init__
(
self
,
builder
,
inplanes
,
planes
,
expansion
,
stride
=
1
,
cardinality
=
1
,
downsample
=
None
,
fused_se
=
True
,
last_bn_0_init
=
False
,
trt
=
False
,
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv1
=
builder
.
conv3x3
(
inplanes
,
planes
,
stride
,
groups
=
cardinality
)
self
.
bn1
=
builder
.
batchnorm
(
planes
)
self
.
relu
=
builder
.
activation
()
self
.
conv2
=
builder
.
conv3x3
(
planes
,
planes
*
expansion
,
groups
=
cardinality
)
self
.
bn2
=
builder
.
batchnorm
(
planes
*
expansion
,
zero_init
=
last_bn_0_init
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
if
self
.
bn1
is
not
None
:
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
if
self
.
bn2
is
not
None
:
out
=
self
.
bn2
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
# BasicBlock }}}
# Bottleneck {{{
class
Bottleneck
(
nn
.
Module
):
def
__init__
(
self
,
builder
,
inplanes
,
planes
,
expansion
,
stride
=
1
,
cardinality
=
1
,
se
=
False
,
se_squeeze
=
16
,
downsample
=
None
,
fused_se
=
True
,
last_bn_0_init
=
False
,
trt
=
False
,
):
super
(
Bottleneck
,
self
).
__init__
()
self
.
conv1
=
builder
.
conv1x1
(
inplanes
,
planes
)
self
.
bn1
=
builder
.
batchnorm
(
planes
)
self
.
conv2
=
builder
.
conv3x3
(
planes
,
planes
,
groups
=
cardinality
,
stride
=
stride
)
self
.
bn2
=
builder
.
batchnorm
(
planes
)
self
.
conv3
=
builder
.
conv1x1
(
planes
,
planes
*
expansion
)
self
.
bn3
=
builder
.
batchnorm
(
planes
*
expansion
,
zero_init
=
last_bn_0_init
)
self
.
relu
=
builder
.
activation
()
self
.
downsample
=
downsample
self
.
stride
=
stride
self
.
fused_se
=
fused_se
if
se
:
self
.
squeeze
=
(
SqueezeAndExcitation
(
planes
*
expansion
,
se_squeeze
,
builder
.
activation
()
)
if
not
trt
else
SqueezeAndExcitationTRT
(
planes
*
expansion
,
se_squeeze
,
builder
.
activation
()
)
)
else
:
self
.
squeeze
=
None
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv3
(
out
)
out
=
self
.
bn3
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
if
self
.
squeeze
is
None
:
out
+=
residual
else
:
if
self
.
fused_se
:
out
=
torch
.
addcmul
(
residual
,
out
,
self
.
squeeze
(
out
),
value
=
1
)
else
:
out
=
residual
+
out
*
self
.
squeeze
(
out
)
out
=
self
.
relu
(
out
)
return
out
class
SEBottleneck
(
Bottleneck
):
def
__init__
(
self
,
builder
,
inplanes
,
planes
,
expansion
,
stride
=
1
,
cardinality
=
1
,
downsample
=
None
,
fused_se
=
True
,
last_bn_0_init
=
False
,
trt
=
False
,
):
super
(
SEBottleneck
,
self
).
__init__
(
builder
,
inplanes
,
planes
,
expansion
,
stride
=
stride
,
cardinality
=
cardinality
,
se
=
True
,
se_squeeze
=
16
,
downsample
=
downsample
,
fused_se
=
fused_se
,
last_bn_0_init
=
last_bn_0_init
,
trt
=
trt
,
)
# Bottleneck }}}
class
ResNet
(
nn
.
Module
):
@
dataclass
class
Arch
(
ModelArch
):
block
:
Type
[
Bottleneck
]
layers
:
List
[
int
]
# arch
widths
:
List
[
int
]
# arch
expansion
:
int
cardinality
:
int
=
1
stem_width
:
int
=
64
activation
:
str
=
"relu"
default_image_size
:
int
=
224
@
dataclass
class
Params
(
ModelParams
):
num_classes
:
int
=
1000
last_bn_0_init
:
bool
=
False
conv_init
:
str
=
"fan_in"
trt
:
bool
=
False
fused_se
:
bool
=
True
def
parser
(
self
,
name
):
p
=
super
().
parser
(
name
)
p
.
add_argument
(
"--num_classes"
,
metavar
=
"N"
,
default
=
self
.
num_classes
,
type
=
int
,
help
=
"number of classes"
,
)
p
.
add_argument
(
"--last_bn_0_init"
,
metavar
=
"True|False"
,
default
=
self
.
last_bn_0_init
,
type
=
bool
,
)
p
.
add_argument
(
"--conv_init"
,
default
=
self
.
conv_init
,
choices
=
[
"fan_in"
,
"fan_out"
],
type
=
str
,
help
=
"initialization mode for convolutional layers, see https://pytorch.org/docs/stable/nn.init.html#torch.nn.init.kaiming_normal_"
,
)
p
.
add_argument
(
"--trt"
,
metavar
=
"True|False"
,
default
=
self
.
trt
,
type
=
bool
)
p
.
add_argument
(
"--fused_se"
,
metavar
=
"True|False"
,
default
=
self
.
fused_se
,
type
=
bool
)
return
p
def
__init__
(
self
,
arch
:
Arch
,
num_classes
:
int
=
1000
,
last_bn_0_init
:
bool
=
False
,
conv_init
:
str
=
"fan_in"
,
trt
:
bool
=
False
,
fused_se
:
bool
=
True
,
):
super
(
ResNet
,
self
).
__init__
()
self
.
arch
=
arch
self
.
builder
=
LayerBuilder
(
LayerBuilder
.
Config
(
activation
=
arch
.
activation
,
conv_init
=
conv_init
)
)
self
.
last_bn_0_init
=
last_bn_0_init
self
.
conv1
=
self
.
builder
.
conv7x7
(
3
,
arch
.
stem_width
,
stride
=
2
)
self
.
bn1
=
self
.
builder
.
batchnorm
(
arch
.
stem_width
)
self
.
relu
=
self
.
builder
.
activation
()
self
.
maxpool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
inplanes
=
arch
.
stem_width
assert
len
(
arch
.
widths
)
==
len
(
arch
.
layers
)
self
.
num_layers
=
len
(
arch
.
widths
)
layers
=
[]
for
i
,
(
w
,
l
)
in
enumerate
(
zip
(
arch
.
widths
,
arch
.
layers
)):
layer
,
inplanes
=
self
.
_make_layer
(
arch
.
block
,
arch
.
expansion
,
inplanes
,
w
,
l
,
cardinality
=
arch
.
cardinality
,
stride
=
1
if
i
==
0
else
2
,
trt
=
trt
,
fused_se
=
fused_se
,
)
layers
.
append
(
layer
)
self
.
layers
=
nn
.
Sequential
(
*
layers
)
self
.
avgpool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
fc
=
nn
.
Linear
(
arch
.
widths
[
-
1
]
*
arch
.
expansion
,
num_classes
)
def
stem
(
self
,
x
):
x
=
self
.
conv1
(
x
)
if
self
.
bn1
is
not
None
:
x
=
self
.
bn1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
maxpool
(
x
)
return
x
def
classifier
(
self
,
x
):
x
=
self
.
avgpool
(
x
)
x
=
x
.
view
(
x
.
size
(
0
),
-
1
)
x
=
self
.
fc
(
x
)
return
x
def
forward
(
self
,
x
):
x
=
self
.
stem
(
x
)
x
=
self
.
layers
(
x
)
x
=
self
.
classifier
(
x
)
return
x
def
extract_features
(
self
,
x
,
layers
=
None
):
if
layers
is
None
:
layers
=
[
f
"layer
{
i
+
1
}
"
for
i
in
range
(
self
.
num_layers
)]
+
[
"classifier"
]
run
=
[
i
for
i
in
range
(
self
.
num_layers
)
if
"classifier"
in
layers
or
any
([
f
"layer
{
j
+
1
}
"
in
layers
for
j
in
range
(
i
,
self
.
num_layers
)])
]
output
=
{}
x
=
self
.
stem
(
x
)
for
l
in
run
:
fn
=
self
.
layers
[
l
]
x
=
fn
(
x
)
if
f
"layer
{
l
+
1
}
"
in
layers
:
output
[
f
"layer
{
l
+
1
}
"
]
=
x
if
"classifier"
in
layers
:
output
[
"classifier"
]
=
self
.
classifier
(
x
)
return
output
# helper functions {{{
def
_make_layer
(
self
,
block
,
expansion
,
inplanes
,
planes
,
blocks
,
stride
=
1
,
cardinality
=
1
,
trt
=
False
,
fused_se
=
True
,
):
downsample
=
None
if
stride
!=
1
or
inplanes
!=
planes
*
expansion
:
dconv
=
self
.
builder
.
conv1x1
(
inplanes
,
planes
*
expansion
,
stride
=
stride
)
dbn
=
self
.
builder
.
batchnorm
(
planes
*
expansion
)
if
dbn
is
not
None
:
downsample
=
nn
.
Sequential
(
dconv
,
dbn
)
else
:
downsample
=
dconv
layers
=
[]
for
i
in
range
(
blocks
):
layers
.
append
(
block
(
self
.
builder
,
inplanes
,
planes
,
expansion
,
stride
=
stride
if
i
==
0
else
1
,
cardinality
=
cardinality
,
downsample
=
downsample
if
i
==
0
else
None
,
fused_se
=
fused_se
,
last_bn_0_init
=
self
.
last_bn_0_init
,
trt
=
trt
,
)
)
inplanes
=
planes
*
expansion
return
nn
.
Sequential
(
*
layers
),
inplanes
def
ngc_checkpoint_remap
(
self
,
url
=
None
,
version
=
None
):
if
version
is
None
:
version
=
url
.
split
(
"/"
)[
8
]
def
to_sequential_remap
(
s
):
splited
=
s
.
split
(
"."
)
if
splited
[
0
].
startswith
(
"layer"
):
return
"."
.
join
(
[
"layers."
+
str
(
int
(
splited
[
0
][
len
(
"layer"
)
:])
-
1
)]
+
splited
[
1
:]
)
else
:
return
s
def
no_remap
(
s
):
return
s
return
{
"20.06.0"
:
to_sequential_remap
}.
get
(
version
,
no_remap
)
# }}}
__models
:
Dict
[
str
,
Model
]
=
{
"resnet50"
:
Model
(
constructor
=
ResNet
,
arch
=
ResNet
.
Arch
(
stem_width
=
64
,
block
=
Bottleneck
,
layers
=
[
3
,
4
,
6
,
3
],
widths
=
[
64
,
128
,
256
,
512
],
expansion
=
4
,
default_image_size
=
224
,
),
params
=
ResNet
.
Params
(),
checkpoint_url
=
"https://api.ngc.nvidia.com/v2/models/nvidia/resnet50_pyt_amp/versions/20.06.0/files/nvidia_resnet50_200821.pth.tar"
,
),
"resnext101-32x4d"
:
Model
(
constructor
=
ResNet
,
arch
=
ResNet
.
Arch
(
stem_width
=
64
,
block
=
Bottleneck
,
layers
=
[
3
,
4
,
23
,
3
],
widths
=
[
128
,
256
,
512
,
1024
],
expansion
=
2
,
cardinality
=
32
,
default_image_size
=
224
,
),
params
=
ResNet
.
Params
(),
checkpoint_url
=
"https://api.ngc.nvidia.com/v2/models/nvidia/resnext101_32x4d_pyt_amp/versions/20.06.0/files/nvidia_resnext101-32x4d_200821.pth.tar"
,
),
"se-resnext101-32x4d"
:
Model
(
constructor
=
ResNet
,
arch
=
ResNet
.
Arch
(
stem_width
=
64
,
block
=
SEBottleneck
,
layers
=
[
3
,
4
,
23
,
3
],
widths
=
[
128
,
256
,
512
,
1024
],
expansion
=
2
,
cardinality
=
32
,
default_image_size
=
224
,
),
params
=
ResNet
.
Params
(),
checkpoint_url
=
"https://api.ngc.nvidia.com/v2/models/nvidia/seresnext101_32x4d_pyt_amp/versions/20.06.0/files/nvidia_se-resnext101-32x4d_200821.pth.tar"
,
),
}
_ce
=
lambda
n
:
EntryPoint
.
create
(
n
,
__models
[
n
])
resnet50
=
_ce
(
"resnet50"
)
resnext101_32x4d
=
_ce
(
"resnext101-32x4d"
)
se_resnext101_32x4d
=
_ce
(
"se-resnext101-32x4d"
)
image_classification/optimizers.py
0 → 100644
View file @
e129194a
import
math
import
numpy
as
np
import
torch
from
torch
import
optim
def
get_optimizer
(
parameters
,
lr
,
args
,
state
=
None
):
if
args
.
optimizer
==
"sgd"
:
optimizer
=
get_sgd_optimizer
(
parameters
,
lr
,
momentum
=
args
.
momentum
,
weight_decay
=
args
.
weight_decay
,
nesterov
=
args
.
nesterov
,
bn_weight_decay
=
args
.
bn_weight_decay
,
)
elif
args
.
optimizer
==
"rmsprop"
:
optimizer
=
get_rmsprop_optimizer
(
parameters
,
lr
,
alpha
=
args
.
rmsprop_alpha
,
momentum
=
args
.
momentum
,
weight_decay
=
args
.
weight_decay
,
eps
=
args
.
rmsprop_eps
,
bn_weight_decay
=
args
.
bn_weight_decay
,
)
if
not
state
is
None
:
optimizer
.
load_state_dict
(
state
)
return
optimizer
def
get_sgd_optimizer
(
parameters
,
lr
,
momentum
,
weight_decay
,
nesterov
=
False
,
bn_weight_decay
=
False
):
if
bn_weight_decay
:
print
(
" ! Weight decay applied to BN parameters "
)
params
=
[
v
for
n
,
v
in
parameters
]
else
:
print
(
" ! Weight decay NOT applied to BN parameters "
)
bn_params
=
[
v
for
n
,
v
in
parameters
if
"bn"
in
n
]
rest_params
=
[
v
for
n
,
v
in
parameters
if
not
"bn"
in
n
]
print
(
len
(
bn_params
))
print
(
len
(
rest_params
))
params
=
[
{
"params"
:
bn_params
,
"weight_decay"
:
0
},
{
"params"
:
rest_params
,
"weight_decay"
:
weight_decay
},
]
optimizer
=
torch
.
optim
.
SGD
(
params
,
lr
,
momentum
=
momentum
,
weight_decay
=
weight_decay
,
nesterov
=
nesterov
)
return
optimizer
def
get_rmsprop_optimizer
(
parameters
,
lr
,
alpha
,
weight_decay
,
momentum
,
eps
,
bn_weight_decay
=
False
):
bn_params
=
[
v
for
n
,
v
in
parameters
if
"bn"
in
n
]
rest_params
=
[
v
for
n
,
v
in
parameters
if
not
"bn"
in
n
]
params
=
[
{
"params"
:
bn_params
,
"weight_decay"
:
weight_decay
if
bn_weight_decay
else
0
},
{
"params"
:
rest_params
,
"weight_decay"
:
weight_decay
},
]
optimizer
=
torch
.
optim
.
RMSprop
(
params
,
lr
=
lr
,
alpha
=
alpha
,
weight_decay
=
weight_decay
,
momentum
=
momentum
,
eps
=
eps
,
)
return
optimizer
def
lr_policy
(
lr_fn
):
def
_alr
(
optimizer
,
iteration
,
epoch
):
lr
=
lr_fn
(
iteration
,
epoch
)
for
param_group
in
optimizer
.
param_groups
:
param_group
[
"lr"
]
=
lr
return
lr
return
_alr
def
lr_step_policy
(
base_lr
,
steps
,
decay_factor
,
warmup_length
):
def
_lr_fn
(
iteration
,
epoch
):
if
epoch
<
warmup_length
:
lr
=
base_lr
*
(
epoch
+
1
)
/
warmup_length
else
:
lr
=
base_lr
for
s
in
steps
:
if
epoch
>=
s
:
lr
*=
decay_factor
return
lr
return
lr_policy
(
_lr_fn
)
def
lr_linear_policy
(
base_lr
,
warmup_length
,
epochs
):
def
_lr_fn
(
iteration
,
epoch
):
if
epoch
<
warmup_length
:
lr
=
base_lr
*
(
epoch
+
1
)
/
warmup_length
else
:
e
=
epoch
-
warmup_length
es
=
epochs
-
warmup_length
lr
=
base_lr
*
(
1
-
(
e
/
es
))
return
lr
return
lr_policy
(
_lr_fn
)
def
lr_cosine_policy
(
base_lr
,
warmup_length
,
epochs
,
end_lr
=
0
):
def
_lr_fn
(
iteration
,
epoch
):
if
epoch
<
warmup_length
:
lr
=
base_lr
*
(
epoch
+
1
)
/
warmup_length
else
:
e
=
epoch
-
warmup_length
es
=
epochs
-
warmup_length
lr
=
end_lr
+
(
0.5
*
(
1
+
np
.
cos
(
np
.
pi
*
e
/
es
))
*
(
base_lr
-
end_lr
))
return
lr
return
lr_policy
(
_lr_fn
)
def
lr_exponential_policy
(
base_lr
,
warmup_length
,
epochs
,
final_multiplier
=
0.001
,
decay_factor
=
None
,
decay_step
=
1
,
logger
=
None
,
):
"""Exponential lr policy. Setting decay factor parameter overrides final_multiplier"""
es
=
epochs
-
warmup_length
if
decay_factor
is
not
None
:
epoch_decay
=
decay_factor
else
:
epoch_decay
=
np
.
power
(
2
,
np
.
log2
(
final_multiplier
)
/
math
.
floor
(
es
/
decay_step
)
)
def
_lr_fn
(
iteration
,
epoch
):
if
epoch
<
warmup_length
:
lr
=
base_lr
*
(
epoch
+
1
)
/
warmup_length
else
:
e
=
epoch
-
warmup_length
lr
=
base_lr
*
(
epoch_decay
**
math
.
floor
(
e
/
decay_step
))
return
lr
return
lr_policy
(
_lr_fn
,
logger
=
logger
)
image_classification/quantization.py
0 → 100644
View file @
e129194a
from
tqdm
import
tqdm
import
torch
import
contextlib
import
time
import
logging
from
pytorch_quantization
import
quant_modules
from
pytorch_quantization
import
nn
as
quant_nn
from
pytorch_quantization
import
calib
from
pytorch_quantization.tensor_quant
import
QuantDescriptor
from
.
import
logger
as
log
from
.utils
import
calc_ips
import
dllogger
initialize
=
quant_modules
.
initialize
deactivate
=
quant_modules
.
deactivate
IPS_METADATA
=
{
"unit"
:
"img/s"
,
"format"
:
":.2f"
}
TIME_METADATA
=
{
"unit"
:
"s"
,
"format"
:
":.5f"
}
def
select_default_calib_method
(
calib_method
=
'histogram'
):
"""Set up selected calibration method in whole network"""
quant_desc_input
=
QuantDescriptor
(
calib_method
=
calib_method
)
quant_nn
.
QuantConv1d
.
set_default_quant_desc_input
(
quant_desc_input
)
quant_nn
.
QuantConv2d
.
set_default_quant_desc_input
(
quant_desc_input
)
quant_nn
.
QuantLinear
.
set_default_quant_desc_input
(
quant_desc_input
)
quant_nn
.
QuantAdaptiveAvgPool2d
.
set_default_quant_desc_input
(
quant_desc_input
)
def
quantization_setup
(
calib_method
=
'histogram'
):
"""Change network into quantized version "automatically" and selects histogram as default quantization method"""
select_default_calib_method
(
calib_method
)
initialize
()
def
disable_calibration
(
model
):
"""Disables calibration in whole network. Should be run always before running interference."""
for
name
,
module
in
model
.
named_modules
():
if
isinstance
(
module
,
quant_nn
.
TensorQuantizer
):
if
module
.
_calibrator
is
not
None
:
module
.
enable_quant
()
module
.
disable_calib
()
else
:
module
.
enable
()
def
collect_stats
(
model
,
data_loader
,
logger
,
num_batches
):
"""Feed data to the network and collect statistic"""
if
logger
is
not
None
:
logger
.
register_metric
(
f
"calib.total_ips"
,
log
.
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
IPS_METADATA
,
)
logger
.
register_metric
(
f
"calib.data_time"
,
log
.
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
TIME_METADATA
,
)
logger
.
register_metric
(
f
"calib.compute_latency"
,
log
.
PERF_METER
(),
verbosity
=
dllogger
.
Verbosity
.
DEFAULT
,
metadata
=
TIME_METADATA
,
)
# Enable calibrators
data_iter
=
enumerate
(
data_loader
)
if
logger
is
not
None
:
data_iter
=
logger
.
iteration_generator_wrapper
(
data_iter
,
mode
=
'calib'
)
for
name
,
module
in
model
.
named_modules
():
if
isinstance
(
module
,
quant_nn
.
TensorQuantizer
):
if
module
.
_calibrator
is
not
None
:
module
.
disable_quant
()
module
.
enable_calib
()
else
:
module
.
disable
()
end
=
time
.
time
()
if
logger
is
not
None
:
logger
.
start_calibration
()
for
i
,
(
image
,
_
)
in
data_iter
:
bs
=
image
.
size
(
0
)
data_time
=
time
.
time
()
-
end
model
(
image
.
cuda
())
it_time
=
time
.
time
()
-
end
if
logger
is
not
None
:
logger
.
log_metric
(
f
"calib.total_ips"
,
calc_ips
(
bs
,
it_time
))
logger
.
log_metric
(
f
"calib.data_time"
,
data_time
)
logger
.
log_metric
(
f
"calib.compute_latency"
,
it_time
-
data_time
)
if
i
>=
num_batches
:
time
.
sleep
(
5
)
break
end
=
time
.
time
()
if
logger
is
not
None
:
logger
.
end_calibration
()
logging
.
disable
(
logging
.
WARNING
)
disable_calibration
(
model
)
def
compute_amax
(
model
,
**
kwargs
):
"""Loads statistics of data and calculates quantization parameters in whole network"""
for
name
,
module
in
model
.
named_modules
():
if
isinstance
(
module
,
quant_nn
.
TensorQuantizer
)
and
module
.
_calibrator
is
not
None
:
if
isinstance
(
module
.
_calibrator
,
calib
.
MaxCalibrator
):
module
.
load_calib_amax
()
else
:
module
.
load_calib_amax
(
**
kwargs
)
model
.
cuda
()
def
calibrate
(
model
,
train_loader
,
logger
,
calib_iter
=
1
,
percentile
=
99.99
):
"""Calibrates whole network i.e. gathers data for quantization and calculates quantization parameters"""
model
.
eval
()
with
torch
.
no_grad
():
collect_stats
(
model
,
train_loader
,
logger
,
num_batches
=
calib_iter
)
compute_amax
(
model
,
method
=
"percentile"
,
percentile
=
percentile
)
logging
.
disable
(
logging
.
NOTSET
)
@
contextlib
.
contextmanager
def
switch_on_quantization
(
do_quantization
=
True
):
"""Context manager for quantization activation"""
if
do_quantization
:
initialize
()
try
:
yield
finally
:
if
do_quantization
:
deactivate
()
image_classification/smoothing.py
0 → 100644
View file @
e129194a
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the BSD 3-Clause License (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://opensource.org/licenses/BSD-3-Clause
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
torch
import
torch.nn
as
nn
class
LabelSmoothing
(
nn
.
Module
):
"""
NLL loss with label smoothing.
"""
def
__init__
(
self
,
smoothing
=
0.0
):
"""
Constructor for the LabelSmoothing module.
:param smoothing: label smoothing factor
"""
super
(
LabelSmoothing
,
self
).
__init__
()
self
.
confidence
=
1.0
-
smoothing
self
.
smoothing
=
smoothing
def
forward
(
self
,
x
,
target
):
logprobs
=
torch
.
nn
.
functional
.
log_softmax
(
x
,
dim
=-
1
)
nll_loss
=
-
logprobs
.
gather
(
dim
=-
1
,
index
=
target
.
unsqueeze
(
1
))
nll_loss
=
nll_loss
.
squeeze
(
1
)
smooth_loss
=
-
logprobs
.
mean
(
dim
=-
1
)
loss
=
self
.
confidence
*
nll_loss
+
self
.
smoothing
*
smooth_loss
return
loss
.
mean
()
image_classification/training.py
0 → 100644
View file @
e129194a
# Copyright (c) 2018-2019, NVIDIA CORPORATION
# Copyright (c) 2017- Facebook, Inc
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
time
from
copy
import
deepcopy
from
functools
import
wraps
from
typing
import
Callable
,
Dict
,
Optional
,
Tuple
import
torch
import
torch.nn
as
nn
from
torch.cuda.amp
import
autocast
from
torch.nn.parallel
import
DistributedDataParallel
as
DDP
from
.
import
logger
as
log
from
.
import
utils
from
.logger
import
TrainingMetrics
,
ValidationMetrics
from
.models.common
import
EMA
class
Executor
:
def
__init__
(
self
,
model
:
nn
.
Module
,
loss
:
Optional
[
nn
.
Module
],
cuda
:
bool
=
True
,
memory_format
:
torch
.
memory_format
=
torch
.
contiguous_format
,
amp
:
bool
=
False
,
scaler
:
Optional
[
torch
.
cuda
.
amp
.
GradScaler
]
=
None
,
divide_loss
:
int
=
1
,
ts_script
:
bool
=
False
,
):
assert
not
(
amp
and
scaler
is
None
),
"Gradient Scaler is needed for AMP"
def
xform
(
m
:
nn
.
Module
)
->
nn
.
Module
:
if
cuda
:
m
=
m
.
cuda
()
m
.
to
(
memory_format
=
memory_format
)
return
m
self
.
model
=
xform
(
model
)
if
ts_script
:
self
.
model
=
torch
.
jit
.
script
(
self
.
model
)
self
.
ts_script
=
ts_script
self
.
loss
=
xform
(
loss
)
if
loss
is
not
None
else
None
self
.
amp
=
amp
self
.
scaler
=
scaler
self
.
is_distributed
=
False
self
.
divide_loss
=
divide_loss
self
.
_fwd_bwd
=
None
self
.
_forward
=
None
def
distributed
(
self
,
gpu_id
):
self
.
is_distributed
=
True
s
=
torch
.
cuda
.
Stream
()
s
.
wait_stream
(
torch
.
cuda
.
current_stream
())
with
torch
.
cuda
.
stream
(
s
):
self
.
model
=
DDP
(
self
.
model
,
device_ids
=
[
gpu_id
],
output_device
=
gpu_id
)
torch
.
cuda
.
current_stream
().
wait_stream
(
s
)
def
_fwd_bwd_fn
(
self
,
input
:
torch
.
Tensor
,
target
:
torch
.
Tensor
,
)
->
torch
.
Tensor
:
with
autocast
(
enabled
=
self
.
amp
):
loss
=
self
.
loss
(
self
.
model
(
input
),
target
)
loss
/=
self
.
divide_loss
self
.
scaler
.
scale
(
loss
).
backward
()
return
loss
def
_forward_fn
(
self
,
input
:
torch
.
Tensor
,
target
:
torch
.
Tensor
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
with
torch
.
no_grad
(),
autocast
(
enabled
=
self
.
amp
):
output
=
self
.
model
(
input
)
loss
=
None
if
self
.
loss
is
None
else
self
.
loss
(
output
,
target
)
return
output
if
loss
is
None
else
loss
,
output
def
optimize
(
self
,
fn
):
return
fn
@
property
def
forward_backward
(
self
):
if
self
.
_fwd_bwd
is
None
:
if
self
.
loss
is
None
:
raise
NotImplementedError
(
"Loss must not be None for forward+backward step"
)
self
.
_fwd_bwd
=
self
.
optimize
(
self
.
_fwd_bwd_fn
)
return
self
.
_fwd_bwd
@
property
def
forward
(
self
):
if
self
.
_forward
is
None
:
self
.
_forward
=
self
.
optimize
(
self
.
_forward_fn
)
return
self
.
_forward
def
train
(
self
):
self
.
model
.
train
()
if
self
.
loss
is
not
None
:
self
.
loss
.
train
()
def
eval
(
self
):
self
.
model
.
eval
()
if
self
.
loss
is
not
None
:
self
.
loss
.
eval
()
class
Trainer
:
def
__init__
(
self
,
executor
:
Executor
,
optimizer
:
torch
.
optim
.
Optimizer
,
grad_acc_steps
:
int
,
ema
:
Optional
[
float
]
=
None
,
):
self
.
executor
=
executor
self
.
optimizer
=
optimizer
self
.
grad_acc_steps
=
grad_acc_steps
self
.
use_ema
=
False
if
ema
is
not
None
:
self
.
ema_executor
=
deepcopy
(
self
.
executor
)
self
.
ema
=
EMA
(
ema
,
self
.
ema_executor
.
model
)
self
.
use_ema
=
True
self
.
optimizer
.
zero_grad
(
set_to_none
=
True
)
self
.
steps_since_update
=
0
def
train
(
self
):
self
.
executor
.
train
()
if
self
.
use_ema
:
self
.
ema_executor
.
train
()
def
eval
(
self
):
self
.
executor
.
eval
()
if
self
.
use_ema
:
self
.
ema_executor
.
eval
()
def
train_step
(
self
,
input
,
target
,
step
=
None
):
loss
=
self
.
executor
.
forward_backward
(
input
,
target
)
self
.
steps_since_update
+=
1
if
self
.
steps_since_update
==
self
.
grad_acc_steps
:
if
self
.
executor
.
scaler
is
not
None
:
self
.
executor
.
scaler
.
step
(
self
.
optimizer
)
self
.
executor
.
scaler
.
update
()
else
:
self
.
optimizer
.
step
()
self
.
optimizer
.
zero_grad
()
self
.
steps_since_update
=
0
torch
.
cuda
.
synchronize
()
if
self
.
use_ema
:
self
.
ema
(
self
.
executor
.
model
,
step
=
step
)
return
loss
def
validation_steps
(
self
)
->
Dict
[
str
,
Callable
]:
vsd
:
Dict
[
str
,
Callable
]
=
{
"val"
:
self
.
executor
.
forward
}
if
self
.
use_ema
:
vsd
[
"val_ema"
]
=
self
.
ema_executor
.
forward
return
vsd
def
state_dict
(
self
)
->
dict
:
res
=
{
"state_dict"
:
self
.
executor
.
model
.
state_dict
(),
"optimizer"
:
self
.
optimizer
.
state_dict
(),
}
if
self
.
use_ema
:
res
[
"state_dict_ema"
]
=
self
.
ema_executor
.
model
.
state_dict
()
return
res
def
train
(
train_step
,
train_loader
,
lr_scheduler
,
grad_scale_fn
,
log_fn
,
timeout_handler
,
prof
=-
1
,
step
=
0
,
):
interrupted
=
False
end
=
time
.
time
()
data_iter
=
enumerate
(
train_loader
)
for
i
,
(
input
,
target
)
in
data_iter
:
bs
=
input
.
size
(
0
)
lr
=
lr_scheduler
(
i
)
data_time
=
time
.
time
()
-
end
loss
=
train_step
(
input
,
target
,
step
=
step
+
i
)
it_time
=
time
.
time
()
-
end
with
torch
.
no_grad
():
if
torch
.
distributed
.
is_initialized
():
reduced_loss
=
utils
.
reduce_tensor
(
loss
.
detach
())
else
:
reduced_loss
=
loss
.
detach
()
log_fn
(
compute_ips
=
utils
.
calc_ips
(
bs
,
it_time
-
data_time
),
total_ips
=
utils
.
calc_ips
(
bs
,
it_time
),
data_time
=
data_time
,
compute_time
=
it_time
-
data_time
,
lr
=
lr
,
loss
=
reduced_loss
.
item
(),
grad_scale
=
grad_scale_fn
(),
)
end
=
time
.
time
()
if
prof
>
0
and
(
i
+
1
>=
prof
):
time
.
sleep
(
5
)
break
if
((
i
+
1
)
%
20
==
0
)
and
timeout_handler
.
interrupted
:
time
.
sleep
(
5
)
interrupted
=
True
break
return
interrupted
def
validate
(
infer_fn
,
val_loader
,
log_fn
,
prof
=-
1
,
with_loss
=
True
,
topk
=
5
):
top1
=
log
.
AverageMeter
()
# switch to evaluate mode
end
=
time
.
time
()
data_iter
=
enumerate
(
val_loader
)
for
i
,
(
input
,
target
)
in
data_iter
:
bs
=
input
.
size
(
0
)
data_time
=
time
.
time
()
-
end
if
with_loss
:
loss
,
output
=
infer_fn
(
input
,
target
)
else
:
output
=
infer_fn
(
input
)
with
torch
.
no_grad
():
precs
=
utils
.
accuracy
(
output
.
data
,
target
,
topk
=
(
1
,
topk
))
if
torch
.
distributed
.
is_initialized
():
if
with_loss
:
reduced_loss
=
utils
.
reduce_tensor
(
loss
.
detach
())
precs
=
map
(
utils
.
reduce_tensor
,
precs
)
else
:
if
with_loss
:
reduced_loss
=
loss
.
detach
()
precs
=
map
(
lambda
t
:
t
.
item
(),
precs
)
infer_result
=
{
f
"top
{
k
}
"
:
(
p
,
bs
)
for
k
,
p
in
zip
((
1
,
topk
),
precs
)}
if
with_loss
:
infer_result
[
"loss"
]
=
(
reduced_loss
.
item
(),
bs
)
torch
.
cuda
.
synchronize
()
it_time
=
time
.
time
()
-
end
top1
.
record
(
infer_result
[
"top1"
][
0
],
bs
)
log_fn
(
compute_ips
=
utils
.
calc_ips
(
bs
,
it_time
-
data_time
),
total_ips
=
utils
.
calc_ips
(
bs
,
it_time
),
data_time
=
data_time
,
compute_time
=
it_time
-
data_time
,
**
infer_result
,
)
end
=
time
.
time
()
if
(
prof
>
0
)
and
(
i
+
1
>=
prof
):
time
.
sleep
(
5
)
break
return
top1
.
get_val
()
# Train loop {{{
def
train_loop
(
trainer
:
Trainer
,
lr_scheduler
,
train_loader
,
train_loader_len
,
val_loader
,
logger
,
best_prec1
=
0
,
start_epoch
=
0
,
end_epoch
=
0
,
early_stopping_patience
=-
1
,
prof
=-
1
,
skip_training
=
False
,
skip_validation
=
False
,
save_checkpoints
=
True
,
checkpoint_dir
=
"./"
,
checkpoint_filename
=
"checkpoint.pth.tar"
,
keep_last_n_checkpoints
=
0
,
topk
=
5
,
):
checkpointer
=
utils
.
Checkpointer
(
last_filename
=
checkpoint_filename
,
checkpoint_dir
=
checkpoint_dir
,
keep_last_n
=
keep_last_n_checkpoints
,
)
train_metrics
=
TrainingMetrics
(
logger
)
val_metrics
=
{
k
:
ValidationMetrics
(
logger
,
k
,
topk
)
for
k
in
trainer
.
validation_steps
().
keys
()
}
training_step
=
trainer
.
train_step
prec1
=
-
1
if
early_stopping_patience
>
0
:
epochs_since_improvement
=
0
print
(
f
"RUNNING EPOCHS FROM
{
start_epoch
}
TO
{
end_epoch
}
"
)
with
utils
.
TimeoutHandler
()
as
timeout_handler
:
interrupted
=
False
for
epoch
in
range
(
start_epoch
,
end_epoch
):
if
logger
is
not
None
:
logger
.
start_epoch
()
if
not
skip_training
:
if
logger
is
not
None
:
data_iter
=
logger
.
iteration_generator_wrapper
(
train_loader
,
mode
=
"train"
)
else
:
data_iter
=
train_loader
trainer
.
train
()
interrupted
=
train
(
training_step
,
data_iter
,
lambda
i
:
lr_scheduler
(
trainer
.
optimizer
,
i
,
epoch
),
trainer
.
executor
.
scaler
.
get_scale
,
train_metrics
.
log
,
timeout_handler
,
prof
=
prof
,
step
=
epoch
*
train_loader_len
,
)
if
not
skip_validation
:
trainer
.
eval
()
for
k
,
infer_fn
in
trainer
.
validation_steps
().
items
():
if
logger
is
not
None
:
data_iter
=
logger
.
iteration_generator_wrapper
(
val_loader
,
mode
=
"val"
)
else
:
data_iter
=
val_loader
step_prec1
,
_
=
validate
(
infer_fn
,
data_iter
,
val_metrics
[
k
].
log
,
prof
=
prof
,
topk
=
topk
,
)
if
k
==
"val"
:
prec1
=
step_prec1
if
prec1
>
best_prec1
:
is_best
=
True
best_prec1
=
prec1
else
:
is_best
=
False
else
:
is_best
=
False
best_prec1
=
0
if
logger
is
not
None
:
logger
.
end_epoch
()
if
save_checkpoints
and
(
not
torch
.
distributed
.
is_initialized
()
or
torch
.
distributed
.
get_rank
()
==
0
):
checkpoint_state
=
{
"epoch"
:
epoch
+
1
,
"best_prec1"
:
best_prec1
,
**
trainer
.
state_dict
(),
}
checkpointer
.
save_checkpoint
(
checkpoint_state
,
is_best
,
filename
=
f
"checkpoint_
{
epoch
:
04
}
.pth.tar"
,
)
if
early_stopping_patience
>
0
:
if
not
is_best
:
epochs_since_improvement
+=
1
else
:
epochs_since_improvement
=
0
if
epochs_since_improvement
>=
early_stopping_patience
:
break
if
interrupted
:
break
# }}}
image_classification/utils.py
0 → 100644
View file @
e129194a
# Copyright (c) 2018-2019, NVIDIA CORPORATION
# Copyright (c) 2017- Facebook, Inc
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import
math
import
os
import
numpy
as
np
import
torch
import
shutil
import
signal
import
torch.distributed
as
dist
class
Checkpointer
:
def
__init__
(
self
,
last_filename
,
checkpoint_dir
=
"./"
,
keep_last_n
=
0
):
self
.
last_filename
=
last_filename
self
.
checkpoints
=
[]
self
.
checkpoint_dir
=
checkpoint_dir
self
.
keep_last_n
=
keep_last_n
def
cleanup
(
self
):
to_delete
=
self
.
checkpoints
[:
-
self
.
keep_last_n
]
self
.
checkpoints
=
self
.
checkpoints
[
-
self
.
keep_last_n
:]
for
f
in
to_delete
:
full_path
=
os
.
path
.
join
(
self
.
checkpoint_dir
,
f
)
os
.
remove
(
full_path
)
def
get_full_path
(
self
,
filename
):
return
os
.
path
.
join
(
self
.
checkpoint_dir
,
filename
)
def
save_checkpoint
(
self
,
state
,
is_best
,
filename
,
):
if
torch
.
distributed
.
is_initialized
()
and
torch
.
distributed
.
get_rank
()
!=
0
:
assert
False
full_path
=
self
.
get_full_path
(
filename
)
print
(
"SAVING {}"
.
format
(
full_path
))
torch
.
save
(
state
,
full_path
)
self
.
checkpoints
.
append
(
filename
)
shutil
.
copyfile
(
full_path
,
self
.
get_full_path
(
self
.
last_filename
)
)
if
is_best
:
shutil
.
copyfile
(
full_path
,
self
.
get_full_path
(
"model_best.pth.tar"
)
)
self
.
cleanup
()
def
timed_generator
(
gen
):
start
=
time
.
time
()
for
g
in
gen
:
end
=
time
.
time
()
t
=
end
-
start
yield
g
,
t
start
=
time
.
time
()
def
timed_function
(
f
):
def
_timed_function
(
*
args
,
**
kwargs
):
start
=
time
.
time
()
ret
=
f
(
*
args
,
**
kwargs
)
return
ret
,
time
.
time
()
-
start
return
_timed_function
def
accuracy
(
output
,
target
,
topk
=
(
1
,)):
"""Computes the precision@k for the specified values of k"""
maxk
=
max
(
topk
)
batch_size
=
target
.
size
(
0
)
_
,
pred
=
output
.
topk
(
maxk
,
1
,
True
,
True
)
pred
=
pred
.
t
()
correct
=
pred
.
eq
(
target
.
view
(
1
,
-
1
).
expand_as
(
pred
))
res
=
[]
for
k
in
topk
:
correct_k
=
correct
[:
k
].
float
().
sum
()
res
.
append
(
correct_k
.
mul_
(
100.0
/
batch_size
))
return
res
def
reduce_tensor
(
tensor
):
rt
=
tensor
.
clone
().
detach
()
dist
.
all_reduce
(
rt
,
op
=
dist
.
ReduceOp
.
SUM
)
rt
/=
(
torch
.
distributed
.
get_world_size
()
if
torch
.
distributed
.
is_initialized
()
else
1
)
return
rt
def
first_n
(
n
,
generator
):
for
i
,
d
in
zip
(
range
(
n
),
generator
):
yield
d
class
TimeoutHandler
:
def
__init__
(
self
,
sig
=
signal
.
SIGTERM
):
self
.
sig
=
sig
self
.
device
=
torch
.
device
(
"cuda"
)
@
property
def
interrupted
(
self
):
if
not
dist
.
is_initialized
():
return
self
.
_interrupted
interrupted
=
torch
.
tensor
(
self
.
_interrupted
).
int
().
to
(
self
.
device
)
dist
.
broadcast
(
interrupted
,
0
)
interrupted
=
bool
(
interrupted
.
item
())
return
interrupted
def
__enter__
(
self
):
self
.
_interrupted
=
False
self
.
released
=
False
self
.
original_handler
=
signal
.
getsignal
(
self
.
sig
)
def
master_handler
(
signum
,
frame
):
self
.
release
()
self
.
_interrupted
=
True
print
(
f
"Received SIGTERM"
)
def
ignoring_handler
(
signum
,
frame
):
self
.
release
()
print
(
"Received SIGTERM, ignoring"
)
rank
=
dist
.
get_rank
()
if
dist
.
is_initialized
()
else
0
if
rank
==
0
:
signal
.
signal
(
self
.
sig
,
master_handler
)
else
:
signal
.
signal
(
self
.
sig
,
ignoring_handler
)
return
self
def
__exit__
(
self
,
type
,
value
,
tb
):
self
.
release
()
def
release
(
self
):
if
self
.
released
:
return
False
signal
.
signal
(
self
.
sig
,
self
.
original_handler
)
self
.
released
=
True
return
True
def
calc_ips
(
batch_size
,
time
):
world_size
=
(
torch
.
distributed
.
get_world_size
()
if
torch
.
distributed
.
is_initialized
()
else
1
)
tbs
=
world_size
*
batch_size
return
tbs
/
time
Prev
1
2
3
4
5
6
7
8
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment