Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
InstructBLIP_pytorch
Commits
c04f261a
Commit
c04f261a
authored
Aug 22, 2024
by
dongchy920
Browse files
InstruceBLIP
parents
Pipeline
#1594
canceled with stages
Changes
421
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3298 additions
and
0 deletions
+3298
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py
...n/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py
+58
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py
...on/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py
+78
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py
...n/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py
+82
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py
...mmon/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py
+117
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py
...notator/uniformer/mmcv/runner/hooks/logger/tensorboard.py
+57
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/text.py
...mmon/annotator/uniformer/mmcv/runner/hooks/logger/text.py
+256
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/wandb.py
...mon/annotator/uniformer/mmcv/runner/hooks/logger/wandb.py
+56
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/lr_updater.py
...ommon/annotator/uniformer/mmcv/runner/hooks/lr_updater.py
+670
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/memory.py
lavis/common/annotator/uniformer/mmcv/runner/hooks/memory.py
+25
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/momentum_updater.py
...annotator/uniformer/mmcv/runner/hooks/momentum_updater.py
+493
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/optimizer.py
...common/annotator/uniformer/mmcv/runner/hooks/optimizer.py
+508
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/profiler.py
.../common/annotator/uniformer/mmcv/runner/hooks/profiler.py
+180
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/sampler_seed.py
...mon/annotator/uniformer/mmcv/runner/hooks/sampler_seed.py
+20
-0
lavis/common/annotator/uniformer/mmcv/runner/hooks/sync_buffer.py
...mmon/annotator/uniformer/mmcv/runner/hooks/sync_buffer.py
+22
-0
lavis/common/annotator/uniformer/mmcv/runner/iter_based_runner.py
...mmon/annotator/uniformer/mmcv/runner/iter_based_runner.py
+273
-0
lavis/common/annotator/uniformer/mmcv/runner/log_buffer.py
lavis/common/annotator/uniformer/mmcv/runner/log_buffer.py
+41
-0
lavis/common/annotator/uniformer/mmcv/runner/optimizer/__init__.py
...mon/annotator/uniformer/mmcv/runner/optimizer/__init__.py
+9
-0
lavis/common/annotator/uniformer/mmcv/runner/optimizer/builder.py
...mmon/annotator/uniformer/mmcv/runner/optimizer/builder.py
+44
-0
lavis/common/annotator/uniformer/mmcv/runner/optimizer/default_constructor.py
...or/uniformer/mmcv/runner/optimizer/default_constructor.py
+249
-0
lavis/common/annotator/uniformer/mmcv/runner/priority.py
lavis/common/annotator/uniformer/mmcv/runner/priority.py
+60
-0
No files found.
Too many changes to show.
To preserve performance only
421 of 421+
files are displayed.
Plain diff
Email patch
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/dvclive.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
...dist_utils
import
master_only
from
..hook
import
HOOKS
from
.base
import
LoggerHook
@
HOOKS
.
register_module
()
class
DvcliveLoggerHook
(
LoggerHook
):
"""Class to log metrics with dvclive.
It requires `dvclive`_ to be installed.
Args:
path (str): Directory where dvclive will write TSV log files.
interval (int): Logging interval (every k iterations).
Default 10.
ignore_last (bool): Ignore the log of last iterations in each epoch
if less than `interval`.
Default: True.
reset_flag (bool): Whether to clear the output buffer after logging.
Default: True.
by_epoch (bool): Whether EpochBasedRunner is used.
Default: True.
.. _dvclive:
https://dvc.org/doc/dvclive
"""
def
__init__
(
self
,
path
,
interval
=
10
,
ignore_last
=
True
,
reset_flag
=
True
,
by_epoch
=
True
):
super
(
DvcliveLoggerHook
,
self
).
__init__
(
interval
,
ignore_last
,
reset_flag
,
by_epoch
)
self
.
path
=
path
self
.
import_dvclive
()
def
import_dvclive
(
self
):
try
:
import
dvclive
except
ImportError
:
raise
ImportError
(
'Please run "pip install dvclive" to install dvclive'
)
self
.
dvclive
=
dvclive
@
master_only
def
before_run
(
self
,
runner
):
self
.
dvclive
.
init
(
self
.
path
)
@
master_only
def
log
(
self
,
runner
):
tags
=
self
.
get_loggable_tags
(
runner
)
if
tags
:
for
k
,
v
in
tags
.
items
():
self
.
dvclive
.
log
(
k
,
v
,
step
=
self
.
get_iter
(
runner
))
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/mlflow.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
...dist_utils
import
master_only
from
..hook
import
HOOKS
from
.base
import
LoggerHook
@
HOOKS
.
register_module
()
class
MlflowLoggerHook
(
LoggerHook
):
def
__init__
(
self
,
exp_name
=
None
,
tags
=
None
,
log_model
=
True
,
interval
=
10
,
ignore_last
=
True
,
reset_flag
=
False
,
by_epoch
=
True
):
"""Class to log metrics and (optionally) a trained model to MLflow.
It requires `MLflow`_ to be installed.
Args:
exp_name (str, optional): Name of the experiment to be used.
Default None.
If not None, set the active experiment.
If experiment does not exist, an experiment with provided name
will be created.
tags (dict of str: str, optional): Tags for the current run.
Default None.
If not None, set tags for the current run.
log_model (bool, optional): Whether to log an MLflow artifact.
Default True.
If True, log runner.model as an MLflow artifact
for the current run.
interval (int): Logging interval (every k iterations).
ignore_last (bool): Ignore the log of last iterations in each epoch
if less than `interval`.
reset_flag (bool): Whether to clear the output buffer after logging
by_epoch (bool): Whether EpochBasedRunner is used.
.. _MLflow:
https://www.mlflow.org/docs/latest/index.html
"""
super
(
MlflowLoggerHook
,
self
).
__init__
(
interval
,
ignore_last
,
reset_flag
,
by_epoch
)
self
.
import_mlflow
()
self
.
exp_name
=
exp_name
self
.
tags
=
tags
self
.
log_model
=
log_model
def
import_mlflow
(
self
):
try
:
import
mlflow
import
mlflow.pytorch
as
mlflow_pytorch
except
ImportError
:
raise
ImportError
(
'Please run "pip install mlflow" to install mlflow'
)
self
.
mlflow
=
mlflow
self
.
mlflow_pytorch
=
mlflow_pytorch
@
master_only
def
before_run
(
self
,
runner
):
super
(
MlflowLoggerHook
,
self
).
before_run
(
runner
)
if
self
.
exp_name
is
not
None
:
self
.
mlflow
.
set_experiment
(
self
.
exp_name
)
if
self
.
tags
is
not
None
:
self
.
mlflow
.
set_tags
(
self
.
tags
)
@
master_only
def
log
(
self
,
runner
):
tags
=
self
.
get_loggable_tags
(
runner
)
if
tags
:
self
.
mlflow
.
log_metrics
(
tags
,
step
=
self
.
get_iter
(
runner
))
@
master_only
def
after_run
(
self
,
runner
):
if
self
.
log_model
:
self
.
mlflow_pytorch
.
log_model
(
runner
.
model
,
'models'
)
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/neptune.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
...dist_utils
import
master_only
from
..hook
import
HOOKS
from
.base
import
LoggerHook
@
HOOKS
.
register_module
()
class
NeptuneLoggerHook
(
LoggerHook
):
"""Class to log metrics to NeptuneAI.
It requires `neptune-client` to be installed.
Args:
init_kwargs (dict): a dict contains the initialization keys as below:
- project (str): Name of a project in a form of
namespace/project_name. If None, the value of
NEPTUNE_PROJECT environment variable will be taken.
- api_token (str): User’s API token.
If None, the value of NEPTUNE_API_TOKEN environment
variable will be taken. Note: It is strongly recommended
to use NEPTUNE_API_TOKEN environment variable rather than
placing your API token in plain text in your source code.
- name (str, optional, default is 'Untitled'): Editable name of
the run. Name is displayed in the run's Details and in
Runs table as a column.
Check https://docs.neptune.ai/api-reference/neptune#init for
more init arguments.
interval (int): Logging interval (every k iterations).
ignore_last (bool): Ignore the log of last iterations in each epoch
if less than `interval`.
reset_flag (bool): Whether to clear the output buffer after logging
by_epoch (bool): Whether EpochBasedRunner is used.
.. _NeptuneAI:
https://docs.neptune.ai/you-should-know/logging-metadata
"""
def
__init__
(
self
,
init_kwargs
=
None
,
interval
=
10
,
ignore_last
=
True
,
reset_flag
=
True
,
with_step
=
True
,
by_epoch
=
True
):
super
(
NeptuneLoggerHook
,
self
).
__init__
(
interval
,
ignore_last
,
reset_flag
,
by_epoch
)
self
.
import_neptune
()
self
.
init_kwargs
=
init_kwargs
self
.
with_step
=
with_step
def
import_neptune
(
self
):
try
:
import
neptune.new
as
neptune
except
ImportError
:
raise
ImportError
(
'Please run "pip install neptune-client" to install neptune'
)
self
.
neptune
=
neptune
self
.
run
=
None
@
master_only
def
before_run
(
self
,
runner
):
if
self
.
init_kwargs
:
self
.
run
=
self
.
neptune
.
init
(
**
self
.
init_kwargs
)
else
:
self
.
run
=
self
.
neptune
.
init
()
@
master_only
def
log
(
self
,
runner
):
tags
=
self
.
get_loggable_tags
(
runner
)
if
tags
:
for
tag_name
,
tag_value
in
tags
.
items
():
if
self
.
with_step
:
self
.
run
[
tag_name
].
log
(
tag_value
,
step
=
self
.
get_iter
(
runner
))
else
:
tags
[
'global_step'
]
=
self
.
get_iter
(
runner
)
self
.
run
[
tag_name
].
log
(
tags
)
@
master_only
def
after_run
(
self
,
runner
):
self
.
run
.
stop
()
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/pavi.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
json
import
os
import
os.path
as
osp
import
torch
import
yaml
import
annotator.uniformer.mmcv
as
mmcv
from
....parallel.utils
import
is_module_wrapper
from
...dist_utils
import
master_only
from
..hook
import
HOOKS
from
.base
import
LoggerHook
@
HOOKS
.
register_module
()
class
PaviLoggerHook
(
LoggerHook
):
def
__init__
(
self
,
init_kwargs
=
None
,
add_graph
=
False
,
add_last_ckpt
=
False
,
interval
=
10
,
ignore_last
=
True
,
reset_flag
=
False
,
by_epoch
=
True
,
img_key
=
'img_info'
):
super
(
PaviLoggerHook
,
self
).
__init__
(
interval
,
ignore_last
,
reset_flag
,
by_epoch
)
self
.
init_kwargs
=
init_kwargs
self
.
add_graph
=
add_graph
self
.
add_last_ckpt
=
add_last_ckpt
self
.
img_key
=
img_key
@
master_only
def
before_run
(
self
,
runner
):
super
(
PaviLoggerHook
,
self
).
before_run
(
runner
)
try
:
from
pavi
import
SummaryWriter
except
ImportError
:
raise
ImportError
(
'Please run "pip install pavi" to install pavi.'
)
self
.
run_name
=
runner
.
work_dir
.
split
(
'/'
)[
-
1
]
if
not
self
.
init_kwargs
:
self
.
init_kwargs
=
dict
()
self
.
init_kwargs
[
'name'
]
=
self
.
run_name
self
.
init_kwargs
[
'model'
]
=
runner
.
_model_name
if
runner
.
meta
is
not
None
:
if
'config_dict'
in
runner
.
meta
:
config_dict
=
runner
.
meta
[
'config_dict'
]
assert
isinstance
(
config_dict
,
dict
),
(
'meta["config_dict"] has to be of a dict, '
f
'but got
{
type
(
config_dict
)
}
'
)
elif
'config_file'
in
runner
.
meta
:
config_file
=
runner
.
meta
[
'config_file'
]
config_dict
=
dict
(
mmcv
.
Config
.
fromfile
(
config_file
))
else
:
config_dict
=
None
if
config_dict
is
not
None
:
# 'max_.*iter' is parsed in pavi sdk as the maximum iterations
# to properly set up the progress bar.
config_dict
=
config_dict
.
copy
()
config_dict
.
setdefault
(
'max_iter'
,
runner
.
max_iters
)
# non-serializable values are first converted in
# mmcv.dump to json
config_dict
=
json
.
loads
(
mmcv
.
dump
(
config_dict
,
file_format
=
'json'
))
session_text
=
yaml
.
dump
(
config_dict
)
self
.
init_kwargs
[
'session_text'
]
=
session_text
self
.
writer
=
SummaryWriter
(
**
self
.
init_kwargs
)
def
get_step
(
self
,
runner
):
"""Get the total training step/epoch."""
if
self
.
get_mode
(
runner
)
==
'val'
and
self
.
by_epoch
:
return
self
.
get_epoch
(
runner
)
else
:
return
self
.
get_iter
(
runner
)
@
master_only
def
log
(
self
,
runner
):
tags
=
self
.
get_loggable_tags
(
runner
,
add_mode
=
False
)
if
tags
:
self
.
writer
.
add_scalars
(
self
.
get_mode
(
runner
),
tags
,
self
.
get_step
(
runner
))
@
master_only
def
after_run
(
self
,
runner
):
if
self
.
add_last_ckpt
:
ckpt_path
=
osp
.
join
(
runner
.
work_dir
,
'latest.pth'
)
if
osp
.
islink
(
ckpt_path
):
ckpt_path
=
osp
.
join
(
runner
.
work_dir
,
os
.
readlink
(
ckpt_path
))
if
osp
.
isfile
(
ckpt_path
):
# runner.epoch += 1 has been done before `after_run`.
iteration
=
runner
.
epoch
if
self
.
by_epoch
else
runner
.
iter
return
self
.
writer
.
add_snapshot_file
(
tag
=
self
.
run_name
,
snapshot_file_path
=
ckpt_path
,
iteration
=
iteration
)
# flush the buffer and send a task ending signal to Pavi
self
.
writer
.
close
()
@
master_only
def
before_epoch
(
self
,
runner
):
if
runner
.
epoch
==
0
and
self
.
add_graph
:
if
is_module_wrapper
(
runner
.
model
):
_model
=
runner
.
model
.
module
else
:
_model
=
runner
.
model
device
=
next
(
_model
.
parameters
()).
device
data
=
next
(
iter
(
runner
.
data_loader
))
image
=
data
[
self
.
img_key
][
0
:
1
].
to
(
device
)
with
torch
.
no_grad
():
self
.
writer
.
add_graph
(
_model
,
image
)
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/tensorboard.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
os.path
as
osp
from
annotator.uniformer.mmcv.utils
import
TORCH_VERSION
,
digit_version
from
...dist_utils
import
master_only
from
..hook
import
HOOKS
from
.base
import
LoggerHook
@
HOOKS
.
register_module
()
class
TensorboardLoggerHook
(
LoggerHook
):
def
__init__
(
self
,
log_dir
=
None
,
interval
=
10
,
ignore_last
=
True
,
reset_flag
=
False
,
by_epoch
=
True
):
super
(
TensorboardLoggerHook
,
self
).
__init__
(
interval
,
ignore_last
,
reset_flag
,
by_epoch
)
self
.
log_dir
=
log_dir
@
master_only
def
before_run
(
self
,
runner
):
super
(
TensorboardLoggerHook
,
self
).
before_run
(
runner
)
if
(
TORCH_VERSION
==
'parrots'
or
digit_version
(
TORCH_VERSION
)
<
digit_version
(
'1.1'
)):
try
:
from
tensorboardX
import
SummaryWriter
except
ImportError
:
raise
ImportError
(
'Please install tensorboardX to use '
'TensorboardLoggerHook.'
)
else
:
try
:
from
torch.utils.tensorboard
import
SummaryWriter
except
ImportError
:
raise
ImportError
(
'Please run "pip install future tensorboard" to install '
'the dependencies to use torch.utils.tensorboard '
'(applicable to PyTorch 1.1 or higher)'
)
if
self
.
log_dir
is
None
:
self
.
log_dir
=
osp
.
join
(
runner
.
work_dir
,
'tf_logs'
)
self
.
writer
=
SummaryWriter
(
self
.
log_dir
)
@
master_only
def
log
(
self
,
runner
):
tags
=
self
.
get_loggable_tags
(
runner
,
allow_text
=
True
)
for
tag
,
val
in
tags
.
items
():
if
isinstance
(
val
,
str
):
self
.
writer
.
add_text
(
tag
,
val
,
self
.
get_iter
(
runner
))
else
:
self
.
writer
.
add_scalar
(
tag
,
val
,
self
.
get_iter
(
runner
))
@
master_only
def
after_run
(
self
,
runner
):
self
.
writer
.
close
()
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/text.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
datetime
import
os
import
os.path
as
osp
from
collections
import
OrderedDict
import
torch
import
torch.distributed
as
dist
import
annotator.uniformer.mmcv
as
mmcv
from
annotator.uniformer.mmcv.fileio.file_client
import
FileClient
from
annotator.uniformer.mmcv.utils
import
is_tuple_of
,
scandir
from
..hook
import
HOOKS
from
.base
import
LoggerHook
@
HOOKS
.
register_module
()
class
TextLoggerHook
(
LoggerHook
):
"""Logger hook in text.
In this logger hook, the information will be printed on terminal and
saved in json file.
Args:
by_epoch (bool, optional): Whether EpochBasedRunner is used.
Default: True.
interval (int, optional): Logging interval (every k iterations).
Default: 10.
ignore_last (bool, optional): Ignore the log of last iterations in each
epoch if less than :attr:`interval`. Default: True.
reset_flag (bool, optional): Whether to clear the output buffer after
logging. Default: False.
interval_exp_name (int, optional): Logging interval for experiment
name. This feature is to help users conveniently get the experiment
information from screen or log file. Default: 1000.
out_dir (str, optional): Logs are saved in ``runner.work_dir`` default.
If ``out_dir`` is specified, logs will be copied to a new directory
which is the concatenation of ``out_dir`` and the last level
directory of ``runner.work_dir``. Default: None.
`New in version 1.3.16.`
out_suffix (str or tuple[str], optional): Those filenames ending with
``out_suffix`` will be copied to ``out_dir``.
Default: ('.log.json', '.log', '.py').
`New in version 1.3.16.`
keep_local (bool, optional): Whether to keep local log when
:attr:`out_dir` is specified. If False, the local log will be
removed. Default: True.
`New in version 1.3.16.`
file_client_args (dict, optional): Arguments to instantiate a
FileClient. See :class:`mmcv.fileio.FileClient` for details.
Default: None.
`New in version 1.3.16.`
"""
def
__init__
(
self
,
by_epoch
=
True
,
interval
=
10
,
ignore_last
=
True
,
reset_flag
=
False
,
interval_exp_name
=
1000
,
out_dir
=
None
,
out_suffix
=
(
'.log.json'
,
'.log'
,
'.py'
),
keep_local
=
True
,
file_client_args
=
None
):
super
(
TextLoggerHook
,
self
).
__init__
(
interval
,
ignore_last
,
reset_flag
,
by_epoch
)
self
.
by_epoch
=
by_epoch
self
.
time_sec_tot
=
0
self
.
interval_exp_name
=
interval_exp_name
if
out_dir
is
None
and
file_client_args
is
not
None
:
raise
ValueError
(
'file_client_args should be "None" when `out_dir` is not'
'specified.'
)
self
.
out_dir
=
out_dir
if
not
(
out_dir
is
None
or
isinstance
(
out_dir
,
str
)
or
is_tuple_of
(
out_dir
,
str
)):
raise
TypeError
(
'out_dir should be "None" or string or tuple of '
'string, but got {out_dir}'
)
self
.
out_suffix
=
out_suffix
self
.
keep_local
=
keep_local
self
.
file_client_args
=
file_client_args
if
self
.
out_dir
is
not
None
:
self
.
file_client
=
FileClient
.
infer_client
(
file_client_args
,
self
.
out_dir
)
def
before_run
(
self
,
runner
):
super
(
TextLoggerHook
,
self
).
before_run
(
runner
)
if
self
.
out_dir
is
not
None
:
self
.
file_client
=
FileClient
.
infer_client
(
self
.
file_client_args
,
self
.
out_dir
)
# The final `self.out_dir` is the concatenation of `self.out_dir`
# and the last level directory of `runner.work_dir`
basename
=
osp
.
basename
(
runner
.
work_dir
.
rstrip
(
osp
.
sep
))
self
.
out_dir
=
self
.
file_client
.
join_path
(
self
.
out_dir
,
basename
)
runner
.
logger
.
info
(
(
f
'Text logs will be saved to
{
self
.
out_dir
}
by '
f
'
{
self
.
file_client
.
name
}
after the training process.'
))
self
.
start_iter
=
runner
.
iter
self
.
json_log_path
=
osp
.
join
(
runner
.
work_dir
,
f
'
{
runner
.
timestamp
}
.log.json'
)
if
runner
.
meta
is
not
None
:
self
.
_dump_log
(
runner
.
meta
,
runner
)
def
_get_max_memory
(
self
,
runner
):
device
=
getattr
(
runner
.
model
,
'output_device'
,
None
)
mem
=
torch
.
cuda
.
max_memory_allocated
(
device
=
device
)
mem_mb
=
torch
.
tensor
([
mem
/
(
1024
*
1024
)],
dtype
=
torch
.
int
,
device
=
device
)
if
runner
.
world_size
>
1
:
dist
.
reduce
(
mem_mb
,
0
,
op
=
dist
.
ReduceOp
.
MAX
)
return
mem_mb
.
item
()
def
_log_info
(
self
,
log_dict
,
runner
):
# print exp name for users to distinguish experiments
# at every ``interval_exp_name`` iterations and the end of each epoch
if
runner
.
meta
is
not
None
and
'exp_name'
in
runner
.
meta
:
if
(
self
.
every_n_iters
(
runner
,
self
.
interval_exp_name
))
or
(
self
.
by_epoch
and
self
.
end_of_epoch
(
runner
)):
exp_info
=
f
'Exp name:
{
runner
.
meta
[
"exp_name"
]
}
'
runner
.
logger
.
info
(
exp_info
)
if
log_dict
[
'mode'
]
==
'train'
:
if
isinstance
(
log_dict
[
'lr'
],
dict
):
lr_str
=
[]
for
k
,
val
in
log_dict
[
'lr'
].
items
():
lr_str
.
append
(
f
'lr_
{
k
}
:
{
val
:.
3
e
}
'
)
lr_str
=
' '
.
join
(
lr_str
)
else
:
lr_str
=
f
'lr:
{
log_dict
[
"lr"
]:.
3
e
}
'
# by epoch: Epoch [4][100/1000]
# by iter: Iter [100/100000]
if
self
.
by_epoch
:
log_str
=
f
'Epoch [
{
log_dict
[
"epoch"
]
}
]'
\
f
'[
{
log_dict
[
"iter"
]
}
/
{
len
(
runner
.
data_loader
)
}
]
\t
'
else
:
log_str
=
f
'Iter [
{
log_dict
[
"iter"
]
}
/
{
runner
.
max_iters
}
]
\t
'
log_str
+=
f
'
{
lr_str
}
, '
if
'time'
in
log_dict
.
keys
():
self
.
time_sec_tot
+=
(
log_dict
[
'time'
]
*
self
.
interval
)
time_sec_avg
=
self
.
time_sec_tot
/
(
runner
.
iter
-
self
.
start_iter
+
1
)
eta_sec
=
time_sec_avg
*
(
runner
.
max_iters
-
runner
.
iter
-
1
)
eta_str
=
str
(
datetime
.
timedelta
(
seconds
=
int
(
eta_sec
)))
log_str
+=
f
'eta:
{
eta_str
}
, '
log_str
+=
f
'time:
{
log_dict
[
"time"
]:.
3
f
}
, '
\
f
'data_time:
{
log_dict
[
"data_time"
]:.
3
f
}
, '
# statistic memory
if
torch
.
cuda
.
is_available
():
log_str
+=
f
'memory:
{
log_dict
[
"memory"
]
}
, '
else
:
# val/test time
# here 1000 is the length of the val dataloader
# by epoch: Epoch[val] [4][1000]
# by iter: Iter[val] [1000]
if
self
.
by_epoch
:
log_str
=
f
'Epoch(
{
log_dict
[
"mode"
]
}
) '
\
f
'[
{
log_dict
[
"epoch"
]
}
][
{
log_dict
[
"iter"
]
}
]
\t
'
else
:
log_str
=
f
'Iter(
{
log_dict
[
"mode"
]
}
) [
{
log_dict
[
"iter"
]
}
]
\t
'
log_items
=
[]
for
name
,
val
in
log_dict
.
items
():
# TODO: resolve this hack
# these items have been in log_str
if
name
in
[
'mode'
,
'Epoch'
,
'iter'
,
'lr'
,
'time'
,
'data_time'
,
'memory'
,
'epoch'
]:
continue
if
isinstance
(
val
,
float
):
val
=
f
'
{
val
:.
4
f
}
'
log_items
.
append
(
f
'
{
name
}
:
{
val
}
'
)
log_str
+=
', '
.
join
(
log_items
)
runner
.
logger
.
info
(
log_str
)
def
_dump_log
(
self
,
log_dict
,
runner
):
# dump log in json format
json_log
=
OrderedDict
()
for
k
,
v
in
log_dict
.
items
():
json_log
[
k
]
=
self
.
_round_float
(
v
)
# only append log at last line
if
runner
.
rank
==
0
:
with
open
(
self
.
json_log_path
,
'a+'
)
as
f
:
mmcv
.
dump
(
json_log
,
f
,
file_format
=
'json'
)
f
.
write
(
'
\n
'
)
def
_round_float
(
self
,
items
):
if
isinstance
(
items
,
list
):
return
[
self
.
_round_float
(
item
)
for
item
in
items
]
elif
isinstance
(
items
,
float
):
return
round
(
items
,
5
)
else
:
return
items
def
log
(
self
,
runner
):
if
'eval_iter_num'
in
runner
.
log_buffer
.
output
:
# this doesn't modify runner.iter and is regardless of by_epoch
cur_iter
=
runner
.
log_buffer
.
output
.
pop
(
'eval_iter_num'
)
else
:
cur_iter
=
self
.
get_iter
(
runner
,
inner_iter
=
True
)
log_dict
=
OrderedDict
(
mode
=
self
.
get_mode
(
runner
),
epoch
=
self
.
get_epoch
(
runner
),
iter
=
cur_iter
)
# only record lr of the first param group
cur_lr
=
runner
.
current_lr
()
if
isinstance
(
cur_lr
,
list
):
log_dict
[
'lr'
]
=
cur_lr
[
0
]
else
:
assert
isinstance
(
cur_lr
,
dict
)
log_dict
[
'lr'
]
=
{}
for
k
,
lr_
in
cur_lr
.
items
():
assert
isinstance
(
lr_
,
list
)
log_dict
[
'lr'
].
update
({
k
:
lr_
[
0
]})
if
'time'
in
runner
.
log_buffer
.
output
:
# statistic memory
if
torch
.
cuda
.
is_available
():
log_dict
[
'memory'
]
=
self
.
_get_max_memory
(
runner
)
log_dict
=
dict
(
log_dict
,
**
runner
.
log_buffer
.
output
)
self
.
_log_info
(
log_dict
,
runner
)
self
.
_dump_log
(
log_dict
,
runner
)
return
log_dict
def
after_run
(
self
,
runner
):
# copy or upload logs to self.out_dir
if
self
.
out_dir
is
not
None
:
for
filename
in
scandir
(
runner
.
work_dir
,
self
.
out_suffix
,
True
):
local_filepath
=
osp
.
join
(
runner
.
work_dir
,
filename
)
out_filepath
=
self
.
file_client
.
join_path
(
self
.
out_dir
,
filename
)
with
open
(
local_filepath
,
'r'
)
as
f
:
self
.
file_client
.
put_text
(
f
.
read
(),
out_filepath
)
runner
.
logger
.
info
(
(
f
'The file
{
local_filepath
}
has been uploaded to '
f
'
{
out_filepath
}
.'
))
if
not
self
.
keep_local
:
os
.
remove
(
local_filepath
)
runner
.
logger
.
info
(
(
f
'
{
local_filepath
}
was removed due to the '
'`self.keep_local=False`'
))
lavis/common/annotator/uniformer/mmcv/runner/hooks/logger/wandb.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
...dist_utils
import
master_only
from
..hook
import
HOOKS
from
.base
import
LoggerHook
@
HOOKS
.
register_module
()
class
WandbLoggerHook
(
LoggerHook
):
def
__init__
(
self
,
init_kwargs
=
None
,
interval
=
10
,
ignore_last
=
True
,
reset_flag
=
False
,
commit
=
True
,
by_epoch
=
True
,
with_step
=
True
):
super
(
WandbLoggerHook
,
self
).
__init__
(
interval
,
ignore_last
,
reset_flag
,
by_epoch
)
self
.
import_wandb
()
self
.
init_kwargs
=
init_kwargs
self
.
commit
=
commit
self
.
with_step
=
with_step
def
import_wandb
(
self
):
try
:
import
wandb
except
ImportError
:
raise
ImportError
(
'Please run "pip install wandb" to install wandb'
)
self
.
wandb
=
wandb
@
master_only
def
before_run
(
self
,
runner
):
super
(
WandbLoggerHook
,
self
).
before_run
(
runner
)
if
self
.
wandb
is
None
:
self
.
import_wandb
()
if
self
.
init_kwargs
:
self
.
wandb
.
init
(
**
self
.
init_kwargs
)
else
:
self
.
wandb
.
init
()
@
master_only
def
log
(
self
,
runner
):
tags
=
self
.
get_loggable_tags
(
runner
)
if
tags
:
if
self
.
with_step
:
self
.
wandb
.
log
(
tags
,
step
=
self
.
get_iter
(
runner
),
commit
=
self
.
commit
)
else
:
tags
[
'global_step'
]
=
self
.
get_iter
(
runner
)
self
.
wandb
.
log
(
tags
,
commit
=
self
.
commit
)
@
master_only
def
after_run
(
self
,
runner
):
self
.
wandb
.
join
()
lavis/common/annotator/uniformer/mmcv/runner/hooks/lr_updater.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
numbers
from
math
import
cos
,
pi
import
annotator.uniformer.mmcv
as
mmcv
from
.hook
import
HOOKS
,
Hook
class
LrUpdaterHook
(
Hook
):
"""LR Scheduler in MMCV.
Args:
by_epoch (bool): LR changes epoch by epoch
warmup (string): Type of warmup used. It can be None(use no warmup),
'constant', 'linear' or 'exp'
warmup_iters (int): The number of iterations or epochs that warmup
lasts
warmup_ratio (float): LR used at the beginning of warmup equals to
warmup_ratio * initial_lr
warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters
means the number of epochs that warmup lasts, otherwise means the
number of iteration that warmup lasts
"""
def
__init__
(
self
,
by_epoch
=
True
,
warmup
=
None
,
warmup_iters
=
0
,
warmup_ratio
=
0.1
,
warmup_by_epoch
=
False
):
# validate the "warmup" argument
if
warmup
is
not
None
:
if
warmup
not
in
[
'constant'
,
'linear'
,
'exp'
]:
raise
ValueError
(
f
'"
{
warmup
}
" is not a supported type for warming up, valid'
' types are "constant" and "linear"'
)
if
warmup
is
not
None
:
assert
warmup_iters
>
0
,
\
'"warmup_iters" must be a positive integer'
assert
0
<
warmup_ratio
<=
1.0
,
\
'"warmup_ratio" must be in range (0,1]'
self
.
by_epoch
=
by_epoch
self
.
warmup
=
warmup
self
.
warmup_iters
=
warmup_iters
self
.
warmup_ratio
=
warmup_ratio
self
.
warmup_by_epoch
=
warmup_by_epoch
if
self
.
warmup_by_epoch
:
self
.
warmup_epochs
=
self
.
warmup_iters
self
.
warmup_iters
=
None
else
:
self
.
warmup_epochs
=
None
self
.
base_lr
=
[]
# initial lr for all param groups
self
.
regular_lr
=
[]
# expected lr if no warming up is performed
def
_set_lr
(
self
,
runner
,
lr_groups
):
if
isinstance
(
runner
.
optimizer
,
dict
):
for
k
,
optim
in
runner
.
optimizer
.
items
():
for
param_group
,
lr
in
zip
(
optim
.
param_groups
,
lr_groups
[
k
]):
param_group
[
'lr'
]
=
lr
else
:
for
param_group
,
lr
in
zip
(
runner
.
optimizer
.
param_groups
,
lr_groups
):
param_group
[
'lr'
]
=
lr
def
get_lr
(
self
,
runner
,
base_lr
):
raise
NotImplementedError
def
get_regular_lr
(
self
,
runner
):
if
isinstance
(
runner
.
optimizer
,
dict
):
lr_groups
=
{}
for
k
in
runner
.
optimizer
.
keys
():
_lr_group
=
[
self
.
get_lr
(
runner
,
_base_lr
)
for
_base_lr
in
self
.
base_lr
[
k
]
]
lr_groups
.
update
({
k
:
_lr_group
})
return
lr_groups
else
:
return
[
self
.
get_lr
(
runner
,
_base_lr
)
for
_base_lr
in
self
.
base_lr
]
def
get_warmup_lr
(
self
,
cur_iters
):
def
_get_warmup_lr
(
cur_iters
,
regular_lr
):
if
self
.
warmup
==
'constant'
:
warmup_lr
=
[
_lr
*
self
.
warmup_ratio
for
_lr
in
regular_lr
]
elif
self
.
warmup
==
'linear'
:
k
=
(
1
-
cur_iters
/
self
.
warmup_iters
)
*
(
1
-
self
.
warmup_ratio
)
warmup_lr
=
[
_lr
*
(
1
-
k
)
for
_lr
in
regular_lr
]
elif
self
.
warmup
==
'exp'
:
k
=
self
.
warmup_ratio
**
(
1
-
cur_iters
/
self
.
warmup_iters
)
warmup_lr
=
[
_lr
*
k
for
_lr
in
regular_lr
]
return
warmup_lr
if
isinstance
(
self
.
regular_lr
,
dict
):
lr_groups
=
{}
for
key
,
regular_lr
in
self
.
regular_lr
.
items
():
lr_groups
[
key
]
=
_get_warmup_lr
(
cur_iters
,
regular_lr
)
return
lr_groups
else
:
return
_get_warmup_lr
(
cur_iters
,
self
.
regular_lr
)
def
before_run
(
self
,
runner
):
# NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved,
# it will be set according to the optimizer params
if
isinstance
(
runner
.
optimizer
,
dict
):
self
.
base_lr
=
{}
for
k
,
optim
in
runner
.
optimizer
.
items
():
for
group
in
optim
.
param_groups
:
group
.
setdefault
(
'initial_lr'
,
group
[
'lr'
])
_base_lr
=
[
group
[
'initial_lr'
]
for
group
in
optim
.
param_groups
]
self
.
base_lr
.
update
({
k
:
_base_lr
})
else
:
for
group
in
runner
.
optimizer
.
param_groups
:
group
.
setdefault
(
'initial_lr'
,
group
[
'lr'
])
self
.
base_lr
=
[
group
[
'initial_lr'
]
for
group
in
runner
.
optimizer
.
param_groups
]
def
before_train_epoch
(
self
,
runner
):
if
self
.
warmup_iters
is
None
:
epoch_len
=
len
(
runner
.
data_loader
)
self
.
warmup_iters
=
self
.
warmup_epochs
*
epoch_len
if
not
self
.
by_epoch
:
return
self
.
regular_lr
=
self
.
get_regular_lr
(
runner
)
self
.
_set_lr
(
runner
,
self
.
regular_lr
)
def
before_train_iter
(
self
,
runner
):
cur_iter
=
runner
.
iter
if
not
self
.
by_epoch
:
self
.
regular_lr
=
self
.
get_regular_lr
(
runner
)
if
self
.
warmup
is
None
or
cur_iter
>=
self
.
warmup_iters
:
self
.
_set_lr
(
runner
,
self
.
regular_lr
)
else
:
warmup_lr
=
self
.
get_warmup_lr
(
cur_iter
)
self
.
_set_lr
(
runner
,
warmup_lr
)
elif
self
.
by_epoch
:
if
self
.
warmup
is
None
or
cur_iter
>
self
.
warmup_iters
:
return
elif
cur_iter
==
self
.
warmup_iters
:
self
.
_set_lr
(
runner
,
self
.
regular_lr
)
else
:
warmup_lr
=
self
.
get_warmup_lr
(
cur_iter
)
self
.
_set_lr
(
runner
,
warmup_lr
)
@
HOOKS
.
register_module
()
class
FixedLrUpdaterHook
(
LrUpdaterHook
):
def
__init__
(
self
,
**
kwargs
):
super
(
FixedLrUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
get_lr
(
self
,
runner
,
base_lr
):
return
base_lr
@
HOOKS
.
register_module
()
class
StepLrUpdaterHook
(
LrUpdaterHook
):
"""Step LR scheduler with min_lr clipping.
Args:
step (int | list[int]): Step to decay the LR. If an int value is given,
regard it as the decay interval. If a list is given, decay LR at
these steps.
gamma (float, optional): Decay LR ratio. Default: 0.1.
min_lr (float, optional): Minimum LR value to keep. If LR after decay
is lower than `min_lr`, it will be clipped to this value. If None
is given, we don't perform lr clipping. Default: None.
"""
def
__init__
(
self
,
step
,
gamma
=
0.1
,
min_lr
=
None
,
**
kwargs
):
if
isinstance
(
step
,
list
):
assert
mmcv
.
is_list_of
(
step
,
int
)
assert
all
([
s
>
0
for
s
in
step
])
elif
isinstance
(
step
,
int
):
assert
step
>
0
else
:
raise
TypeError
(
'"step" must be a list or integer'
)
self
.
step
=
step
self
.
gamma
=
gamma
self
.
min_lr
=
min_lr
super
(
StepLrUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
get_lr
(
self
,
runner
,
base_lr
):
progress
=
runner
.
epoch
if
self
.
by_epoch
else
runner
.
iter
# calculate exponential term
if
isinstance
(
self
.
step
,
int
):
exp
=
progress
//
self
.
step
else
:
exp
=
len
(
self
.
step
)
for
i
,
s
in
enumerate
(
self
.
step
):
if
progress
<
s
:
exp
=
i
break
lr
=
base_lr
*
(
self
.
gamma
**
exp
)
if
self
.
min_lr
is
not
None
:
# clip to a minimum value
lr
=
max
(
lr
,
self
.
min_lr
)
return
lr
@
HOOKS
.
register_module
()
class
ExpLrUpdaterHook
(
LrUpdaterHook
):
def
__init__
(
self
,
gamma
,
**
kwargs
):
self
.
gamma
=
gamma
super
(
ExpLrUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
get_lr
(
self
,
runner
,
base_lr
):
progress
=
runner
.
epoch
if
self
.
by_epoch
else
runner
.
iter
return
base_lr
*
self
.
gamma
**
progress
@
HOOKS
.
register_module
()
class
PolyLrUpdaterHook
(
LrUpdaterHook
):
def
__init__
(
self
,
power
=
1.
,
min_lr
=
0.
,
**
kwargs
):
self
.
power
=
power
self
.
min_lr
=
min_lr
super
(
PolyLrUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
get_lr
(
self
,
runner
,
base_lr
):
if
self
.
by_epoch
:
progress
=
runner
.
epoch
max_progress
=
runner
.
max_epochs
else
:
progress
=
runner
.
iter
max_progress
=
runner
.
max_iters
coeff
=
(
1
-
progress
/
max_progress
)
**
self
.
power
return
(
base_lr
-
self
.
min_lr
)
*
coeff
+
self
.
min_lr
@
HOOKS
.
register_module
()
class
InvLrUpdaterHook
(
LrUpdaterHook
):
def
__init__
(
self
,
gamma
,
power
=
1.
,
**
kwargs
):
self
.
gamma
=
gamma
self
.
power
=
power
super
(
InvLrUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
get_lr
(
self
,
runner
,
base_lr
):
progress
=
runner
.
epoch
if
self
.
by_epoch
else
runner
.
iter
return
base_lr
*
(
1
+
self
.
gamma
*
progress
)
**
(
-
self
.
power
)
@
HOOKS
.
register_module
()
class
CosineAnnealingLrUpdaterHook
(
LrUpdaterHook
):
def
__init__
(
self
,
min_lr
=
None
,
min_lr_ratio
=
None
,
**
kwargs
):
assert
(
min_lr
is
None
)
^
(
min_lr_ratio
is
None
)
self
.
min_lr
=
min_lr
self
.
min_lr_ratio
=
min_lr_ratio
super
(
CosineAnnealingLrUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
get_lr
(
self
,
runner
,
base_lr
):
if
self
.
by_epoch
:
progress
=
runner
.
epoch
max_progress
=
runner
.
max_epochs
else
:
progress
=
runner
.
iter
max_progress
=
runner
.
max_iters
if
self
.
min_lr_ratio
is
not
None
:
target_lr
=
base_lr
*
self
.
min_lr_ratio
else
:
target_lr
=
self
.
min_lr
return
annealing_cos
(
base_lr
,
target_lr
,
progress
/
max_progress
)
@
HOOKS
.
register_module
()
class
FlatCosineAnnealingLrUpdaterHook
(
LrUpdaterHook
):
"""Flat + Cosine lr schedule.
Modified from https://github.com/fastai/fastai/blob/master/fastai/callback/schedule.py#L128 # noqa: E501
Args:
start_percent (float): When to start annealing the learning rate
after the percentage of the total training steps.
The value should be in range [0, 1).
Default: 0.75
min_lr (float, optional): The minimum lr. Default: None.
min_lr_ratio (float, optional): The ratio of minimum lr to the base lr.
Either `min_lr` or `min_lr_ratio` should be specified.
Default: None.
"""
def
__init__
(
self
,
start_percent
=
0.75
,
min_lr
=
None
,
min_lr_ratio
=
None
,
**
kwargs
):
assert
(
min_lr
is
None
)
^
(
min_lr_ratio
is
None
)
if
start_percent
<
0
or
start_percent
>
1
or
not
isinstance
(
start_percent
,
float
):
raise
ValueError
(
'expected float between 0 and 1 start_percent, but '
f
'got
{
start_percent
}
'
)
self
.
start_percent
=
start_percent
self
.
min_lr
=
min_lr
self
.
min_lr_ratio
=
min_lr_ratio
super
(
FlatCosineAnnealingLrUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
get_lr
(
self
,
runner
,
base_lr
):
if
self
.
by_epoch
:
start
=
round
(
runner
.
max_epochs
*
self
.
start_percent
)
progress
=
runner
.
epoch
-
start
max_progress
=
runner
.
max_epochs
-
start
else
:
start
=
round
(
runner
.
max_iters
*
self
.
start_percent
)
progress
=
runner
.
iter
-
start
max_progress
=
runner
.
max_iters
-
start
if
self
.
min_lr_ratio
is
not
None
:
target_lr
=
base_lr
*
self
.
min_lr_ratio
else
:
target_lr
=
self
.
min_lr
if
progress
<
0
:
return
base_lr
else
:
return
annealing_cos
(
base_lr
,
target_lr
,
progress
/
max_progress
)
@
HOOKS
.
register_module
()
class
CosineRestartLrUpdaterHook
(
LrUpdaterHook
):
"""Cosine annealing with restarts learning rate scheme.
Args:
periods (list[int]): Periods for each cosine anneling cycle.
restart_weights (list[float], optional): Restart weights at each
restart iteration. Default: [1].
min_lr (float, optional): The minimum lr. Default: None.
min_lr_ratio (float, optional): The ratio of minimum lr to the base lr.
Either `min_lr` or `min_lr_ratio` should be specified.
Default: None.
"""
def
__init__
(
self
,
periods
,
restart_weights
=
[
1
],
min_lr
=
None
,
min_lr_ratio
=
None
,
**
kwargs
):
assert
(
min_lr
is
None
)
^
(
min_lr_ratio
is
None
)
self
.
periods
=
periods
self
.
min_lr
=
min_lr
self
.
min_lr_ratio
=
min_lr_ratio
self
.
restart_weights
=
restart_weights
assert
(
len
(
self
.
periods
)
==
len
(
self
.
restart_weights
)
),
'periods and restart_weights should have the same length.'
super
(
CosineRestartLrUpdaterHook
,
self
).
__init__
(
**
kwargs
)
self
.
cumulative_periods
=
[
sum
(
self
.
periods
[
0
:
i
+
1
])
for
i
in
range
(
0
,
len
(
self
.
periods
))
]
def
get_lr
(
self
,
runner
,
base_lr
):
if
self
.
by_epoch
:
progress
=
runner
.
epoch
else
:
progress
=
runner
.
iter
if
self
.
min_lr_ratio
is
not
None
:
target_lr
=
base_lr
*
self
.
min_lr_ratio
else
:
target_lr
=
self
.
min_lr
idx
=
get_position_from_periods
(
progress
,
self
.
cumulative_periods
)
current_weight
=
self
.
restart_weights
[
idx
]
nearest_restart
=
0
if
idx
==
0
else
self
.
cumulative_periods
[
idx
-
1
]
current_periods
=
self
.
periods
[
idx
]
alpha
=
min
((
progress
-
nearest_restart
)
/
current_periods
,
1
)
return
annealing_cos
(
base_lr
,
target_lr
,
alpha
,
current_weight
)
def
get_position_from_periods
(
iteration
,
cumulative_periods
):
"""Get the position from a period list.
It will return the index of the right-closest number in the period list.
For example, the cumulative_periods = [100, 200, 300, 400],
if iteration == 50, return 0;
if iteration == 210, return 2;
if iteration == 300, return 3.
Args:
iteration (int): Current iteration.
cumulative_periods (list[int]): Cumulative period list.
Returns:
int: The position of the right-closest number in the period list.
"""
for
i
,
period
in
enumerate
(
cumulative_periods
):
if
iteration
<
period
:
return
i
raise
ValueError
(
f
'Current iteration
{
iteration
}
exceeds '
f
'cumulative_periods
{
cumulative_periods
}
'
)
@
HOOKS
.
register_module
()
class
CyclicLrUpdaterHook
(
LrUpdaterHook
):
"""Cyclic LR Scheduler.
Implement the cyclical learning rate policy (CLR) described in
https://arxiv.org/pdf/1506.01186.pdf
Different from the original paper, we use cosine annealing rather than
triangular policy inside a cycle. This improves the performance in the
3D detection area.
Args:
by_epoch (bool): Whether to update LR by epoch.
target_ratio (tuple[float]): Relative ratio of the highest LR and the
lowest LR to the initial LR.
cyclic_times (int): Number of cycles during training
step_ratio_up (float): The ratio of the increasing process of LR in
the total cycle.
anneal_strategy (str): {'cos', 'linear'}
Specifies the annealing strategy: 'cos' for cosine annealing,
'linear' for linear annealing. Default: 'cos'.
"""
def
__init__
(
self
,
by_epoch
=
False
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
anneal_strategy
=
'cos'
,
**
kwargs
):
if
isinstance
(
target_ratio
,
float
):
target_ratio
=
(
target_ratio
,
target_ratio
/
1e5
)
elif
isinstance
(
target_ratio
,
tuple
):
target_ratio
=
(
target_ratio
[
0
],
target_ratio
[
0
]
/
1e5
)
\
if
len
(
target_ratio
)
==
1
else
target_ratio
else
:
raise
ValueError
(
'target_ratio should be either float '
f
'or tuple, got
{
type
(
target_ratio
)
}
'
)
assert
len
(
target_ratio
)
==
2
,
\
'"target_ratio" must be list or tuple of two floats'
assert
0
<=
step_ratio_up
<
1.0
,
\
'"step_ratio_up" must be in range [0,1)'
self
.
target_ratio
=
target_ratio
self
.
cyclic_times
=
cyclic_times
self
.
step_ratio_up
=
step_ratio_up
self
.
lr_phases
=
[]
# init lr_phases
# validate anneal_strategy
if
anneal_strategy
not
in
[
'cos'
,
'linear'
]:
raise
ValueError
(
'anneal_strategy must be one of "cos" or '
f
'"linear", instead got
{
anneal_strategy
}
'
)
elif
anneal_strategy
==
'cos'
:
self
.
anneal_func
=
annealing_cos
elif
anneal_strategy
==
'linear'
:
self
.
anneal_func
=
annealing_linear
assert
not
by_epoch
,
\
'currently only support "by_epoch" = False'
super
(
CyclicLrUpdaterHook
,
self
).
__init__
(
by_epoch
,
**
kwargs
)
def
before_run
(
self
,
runner
):
super
(
CyclicLrUpdaterHook
,
self
).
before_run
(
runner
)
# initiate lr_phases
# total lr_phases are separated as up and down
max_iter_per_phase
=
runner
.
max_iters
//
self
.
cyclic_times
iter_up_phase
=
int
(
self
.
step_ratio_up
*
max_iter_per_phase
)
self
.
lr_phases
.
append
(
[
0
,
iter_up_phase
,
max_iter_per_phase
,
1
,
self
.
target_ratio
[
0
]])
self
.
lr_phases
.
append
([
iter_up_phase
,
max_iter_per_phase
,
max_iter_per_phase
,
self
.
target_ratio
[
0
],
self
.
target_ratio
[
1
]
])
def
get_lr
(
self
,
runner
,
base_lr
):
curr_iter
=
runner
.
iter
for
(
start_iter
,
end_iter
,
max_iter_per_phase
,
start_ratio
,
end_ratio
)
in
self
.
lr_phases
:
curr_iter
%=
max_iter_per_phase
if
start_iter
<=
curr_iter
<
end_iter
:
progress
=
curr_iter
-
start_iter
return
self
.
anneal_func
(
base_lr
*
start_ratio
,
base_lr
*
end_ratio
,
progress
/
(
end_iter
-
start_iter
))
@
HOOKS
.
register_module
()
class
OneCycleLrUpdaterHook
(
LrUpdaterHook
):
"""One Cycle LR Scheduler.
The 1cycle learning rate policy changes the learning rate after every
batch. The one cycle learning rate policy is described in
https://arxiv.org/pdf/1708.07120.pdf
Args:
max_lr (float or list): Upper learning rate boundaries in the cycle
for each parameter group.
total_steps (int, optional): The total number of steps in the cycle.
Note that if a value is not provided here, it will be the max_iter
of runner. Default: None.
pct_start (float): The percentage of the cycle (in number of steps)
spent increasing the learning rate.
Default: 0.3
anneal_strategy (str): {'cos', 'linear'}
Specifies the annealing strategy: 'cos' for cosine annealing,
'linear' for linear annealing.
Default: 'cos'
div_factor (float): Determines the initial learning rate via
initial_lr = max_lr/div_factor
Default: 25
final_div_factor (float): Determines the minimum learning rate via
min_lr = initial_lr/final_div_factor
Default: 1e4
three_phase (bool): If three_phase is True, use a third phase of the
schedule to annihilate the learning rate according to
final_div_factor instead of modifying the second phase (the first
two phases will be symmetrical about the step indicated by
pct_start).
Default: False
"""
def
__init__
(
self
,
max_lr
,
total_steps
=
None
,
pct_start
=
0.3
,
anneal_strategy
=
'cos'
,
div_factor
=
25
,
final_div_factor
=
1e4
,
three_phase
=
False
,
**
kwargs
):
# validate by_epoch, currently only support by_epoch = False
if
'by_epoch'
not
in
kwargs
:
kwargs
[
'by_epoch'
]
=
False
else
:
assert
not
kwargs
[
'by_epoch'
],
\
'currently only support "by_epoch" = False'
if
not
isinstance
(
max_lr
,
(
numbers
.
Number
,
list
,
dict
)):
raise
ValueError
(
'the type of max_lr must be the one of list or '
f
'dict, but got
{
type
(
max_lr
)
}
'
)
self
.
_max_lr
=
max_lr
if
total_steps
is
not
None
:
if
not
isinstance
(
total_steps
,
int
):
raise
ValueError
(
'the type of total_steps must be int, but'
f
'got
{
type
(
total_steps
)
}
'
)
self
.
total_steps
=
total_steps
# validate pct_start
if
pct_start
<
0
or
pct_start
>
1
or
not
isinstance
(
pct_start
,
float
):
raise
ValueError
(
'expected float between 0 and 1 pct_start, but '
f
'got
{
pct_start
}
'
)
self
.
pct_start
=
pct_start
# validate anneal_strategy
if
anneal_strategy
not
in
[
'cos'
,
'linear'
]:
raise
ValueError
(
'anneal_strategy must be one of "cos" or '
f
'"linear", instead got
{
anneal_strategy
}
'
)
elif
anneal_strategy
==
'cos'
:
self
.
anneal_func
=
annealing_cos
elif
anneal_strategy
==
'linear'
:
self
.
anneal_func
=
annealing_linear
self
.
div_factor
=
div_factor
self
.
final_div_factor
=
final_div_factor
self
.
three_phase
=
three_phase
self
.
lr_phases
=
[]
# init lr_phases
super
(
OneCycleLrUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
before_run
(
self
,
runner
):
if
hasattr
(
self
,
'total_steps'
):
total_steps
=
self
.
total_steps
else
:
total_steps
=
runner
.
max_iters
if
total_steps
<
runner
.
max_iters
:
raise
ValueError
(
'The total steps must be greater than or equal to max '
f
'iterations
{
runner
.
max_iters
}
of runner, but total steps '
f
'is
{
total_steps
}
.'
)
if
isinstance
(
runner
.
optimizer
,
dict
):
self
.
base_lr
=
{}
for
k
,
optim
in
runner
.
optimizer
.
items
():
_max_lr
=
format_param
(
k
,
optim
,
self
.
_max_lr
)
self
.
base_lr
[
k
]
=
[
lr
/
self
.
div_factor
for
lr
in
_max_lr
]
for
group
,
lr
in
zip
(
optim
.
param_groups
,
self
.
base_lr
[
k
]):
group
.
setdefault
(
'initial_lr'
,
lr
)
else
:
k
=
type
(
runner
.
optimizer
).
__name__
_max_lr
=
format_param
(
k
,
runner
.
optimizer
,
self
.
_max_lr
)
self
.
base_lr
=
[
lr
/
self
.
div_factor
for
lr
in
_max_lr
]
for
group
,
lr
in
zip
(
runner
.
optimizer
.
param_groups
,
self
.
base_lr
):
group
.
setdefault
(
'initial_lr'
,
lr
)
if
self
.
three_phase
:
self
.
lr_phases
.
append
(
[
float
(
self
.
pct_start
*
total_steps
)
-
1
,
1
,
self
.
div_factor
])
self
.
lr_phases
.
append
([
float
(
2
*
self
.
pct_start
*
total_steps
)
-
2
,
self
.
div_factor
,
1
])
self
.
lr_phases
.
append
(
[
total_steps
-
1
,
1
,
1
/
self
.
final_div_factor
])
else
:
self
.
lr_phases
.
append
(
[
float
(
self
.
pct_start
*
total_steps
)
-
1
,
1
,
self
.
div_factor
])
self
.
lr_phases
.
append
(
[
total_steps
-
1
,
self
.
div_factor
,
1
/
self
.
final_div_factor
])
def
get_lr
(
self
,
runner
,
base_lr
):
curr_iter
=
runner
.
iter
start_iter
=
0
for
i
,
(
end_iter
,
start_lr
,
end_lr
)
in
enumerate
(
self
.
lr_phases
):
if
curr_iter
<=
end_iter
:
pct
=
(
curr_iter
-
start_iter
)
/
(
end_iter
-
start_iter
)
lr
=
self
.
anneal_func
(
base_lr
*
start_lr
,
base_lr
*
end_lr
,
pct
)
break
start_iter
=
end_iter
return
lr
def
annealing_cos
(
start
,
end
,
factor
,
weight
=
1
):
"""Calculate annealing cos learning rate.
Cosine anneal from `weight * start + (1 - weight) * end` to `end` as
percentage goes from 0.0 to 1.0.
Args:
start (float): The starting learning rate of the cosine annealing.
end (float): The ending learing rate of the cosine annealing.
factor (float): The coefficient of `pi` when calculating the current
percentage. Range from 0.0 to 1.0.
weight (float, optional): The combination factor of `start` and `end`
when calculating the actual starting learning rate. Default to 1.
"""
cos_out
=
cos
(
pi
*
factor
)
+
1
return
end
+
0.5
*
weight
*
(
start
-
end
)
*
cos_out
def
annealing_linear
(
start
,
end
,
factor
):
"""Calculate annealing linear learning rate.
Linear anneal from `start` to `end` as percentage goes from 0.0 to 1.0.
Args:
start (float): The starting learning rate of the linear annealing.
end (float): The ending learing rate of the linear annealing.
factor (float): The coefficient of `pi` when calculating the current
percentage. Range from 0.0 to 1.0.
"""
return
start
+
(
end
-
start
)
*
factor
def
format_param
(
name
,
optim
,
param
):
if
isinstance
(
param
,
numbers
.
Number
):
return
[
param
]
*
len
(
optim
.
param_groups
)
elif
isinstance
(
param
,
(
list
,
tuple
)):
# multi param groups
if
len
(
param
)
!=
len
(
optim
.
param_groups
):
raise
ValueError
(
f
'expected
{
len
(
optim
.
param_groups
)
}
'
f
'values for
{
name
}
, got
{
len
(
param
)
}
'
)
return
param
else
:
# multi optimizers
if
name
not
in
param
:
raise
KeyError
(
f
'
{
name
}
is not found in
{
param
.
keys
()
}
'
)
return
param
[
name
]
lavis/common/annotator/uniformer/mmcv/runner/hooks/memory.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
.hook
import
HOOKS
,
Hook
@
HOOKS
.
register_module
()
class
EmptyCacheHook
(
Hook
):
def
__init__
(
self
,
before_epoch
=
False
,
after_epoch
=
True
,
after_iter
=
False
):
self
.
_before_epoch
=
before_epoch
self
.
_after_epoch
=
after_epoch
self
.
_after_iter
=
after_iter
def
after_iter
(
self
,
runner
):
if
self
.
_after_iter
:
torch
.
cuda
.
empty_cache
()
def
before_epoch
(
self
,
runner
):
if
self
.
_before_epoch
:
torch
.
cuda
.
empty_cache
()
def
after_epoch
(
self
,
runner
):
if
self
.
_after_epoch
:
torch
.
cuda
.
empty_cache
()
lavis/common/annotator/uniformer/mmcv/runner/hooks/momentum_updater.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
annotator.uniformer.mmcv
as
mmcv
from
.hook
import
HOOKS
,
Hook
from
.lr_updater
import
annealing_cos
,
annealing_linear
,
format_param
class
MomentumUpdaterHook
(
Hook
):
def
__init__
(
self
,
by_epoch
=
True
,
warmup
=
None
,
warmup_iters
=
0
,
warmup_ratio
=
0.9
):
# validate the "warmup" argument
if
warmup
is
not
None
:
if
warmup
not
in
[
'constant'
,
'linear'
,
'exp'
]:
raise
ValueError
(
f
'"
{
warmup
}
" is not a supported type for warming up, valid'
' types are "constant" and "linear"'
)
if
warmup
is
not
None
:
assert
warmup_iters
>
0
,
\
'"warmup_iters" must be a positive integer'
assert
0
<
warmup_ratio
<=
1.0
,
\
'"warmup_momentum" must be in range (0,1]'
self
.
by_epoch
=
by_epoch
self
.
warmup
=
warmup
self
.
warmup_iters
=
warmup_iters
self
.
warmup_ratio
=
warmup_ratio
self
.
base_momentum
=
[]
# initial momentum for all param groups
self
.
regular_momentum
=
[
]
# expected momentum if no warming up is performed
def
_set_momentum
(
self
,
runner
,
momentum_groups
):
if
isinstance
(
runner
.
optimizer
,
dict
):
for
k
,
optim
in
runner
.
optimizer
.
items
():
for
param_group
,
mom
in
zip
(
optim
.
param_groups
,
momentum_groups
[
k
]):
if
'momentum'
in
param_group
.
keys
():
param_group
[
'momentum'
]
=
mom
elif
'betas'
in
param_group
.
keys
():
param_group
[
'betas'
]
=
(
mom
,
param_group
[
'betas'
][
1
])
else
:
for
param_group
,
mom
in
zip
(
runner
.
optimizer
.
param_groups
,
momentum_groups
):
if
'momentum'
in
param_group
.
keys
():
param_group
[
'momentum'
]
=
mom
elif
'betas'
in
param_group
.
keys
():
param_group
[
'betas'
]
=
(
mom
,
param_group
[
'betas'
][
1
])
def
get_momentum
(
self
,
runner
,
base_momentum
):
raise
NotImplementedError
def
get_regular_momentum
(
self
,
runner
):
if
isinstance
(
runner
.
optimizer
,
dict
):
momentum_groups
=
{}
for
k
in
runner
.
optimizer
.
keys
():
_momentum_group
=
[
self
.
get_momentum
(
runner
,
_base_momentum
)
for
_base_momentum
in
self
.
base_momentum
[
k
]
]
momentum_groups
.
update
({
k
:
_momentum_group
})
return
momentum_groups
else
:
return
[
self
.
get_momentum
(
runner
,
_base_momentum
)
for
_base_momentum
in
self
.
base_momentum
]
def
get_warmup_momentum
(
self
,
cur_iters
):
def
_get_warmup_momentum
(
cur_iters
,
regular_momentum
):
if
self
.
warmup
==
'constant'
:
warmup_momentum
=
[
_momentum
/
self
.
warmup_ratio
for
_momentum
in
self
.
regular_momentum
]
elif
self
.
warmup
==
'linear'
:
k
=
(
1
-
cur_iters
/
self
.
warmup_iters
)
*
(
1
-
self
.
warmup_ratio
)
warmup_momentum
=
[
_momentum
/
(
1
-
k
)
for
_momentum
in
self
.
regular_mom
]
elif
self
.
warmup
==
'exp'
:
k
=
self
.
warmup_ratio
**
(
1
-
cur_iters
/
self
.
warmup_iters
)
warmup_momentum
=
[
_momentum
/
k
for
_momentum
in
self
.
regular_mom
]
return
warmup_momentum
if
isinstance
(
self
.
regular_momentum
,
dict
):
momentum_groups
=
{}
for
key
,
regular_momentum
in
self
.
regular_momentum
.
items
():
momentum_groups
[
key
]
=
_get_warmup_momentum
(
cur_iters
,
regular_momentum
)
return
momentum_groups
else
:
return
_get_warmup_momentum
(
cur_iters
,
self
.
regular_momentum
)
def
before_run
(
self
,
runner
):
# NOTE: when resuming from a checkpoint,
# if 'initial_momentum' is not saved,
# it will be set according to the optimizer params
if
isinstance
(
runner
.
optimizer
,
dict
):
self
.
base_momentum
=
{}
for
k
,
optim
in
runner
.
optimizer
.
items
():
for
group
in
optim
.
param_groups
:
if
'momentum'
in
group
.
keys
():
group
.
setdefault
(
'initial_momentum'
,
group
[
'momentum'
])
else
:
group
.
setdefault
(
'initial_momentum'
,
group
[
'betas'
][
0
])
_base_momentum
=
[
group
[
'initial_momentum'
]
for
group
in
optim
.
param_groups
]
self
.
base_momentum
.
update
({
k
:
_base_momentum
})
else
:
for
group
in
runner
.
optimizer
.
param_groups
:
if
'momentum'
in
group
.
keys
():
group
.
setdefault
(
'initial_momentum'
,
group
[
'momentum'
])
else
:
group
.
setdefault
(
'initial_momentum'
,
group
[
'betas'
][
0
])
self
.
base_momentum
=
[
group
[
'initial_momentum'
]
for
group
in
runner
.
optimizer
.
param_groups
]
def
before_train_epoch
(
self
,
runner
):
if
not
self
.
by_epoch
:
return
self
.
regular_mom
=
self
.
get_regular_momentum
(
runner
)
self
.
_set_momentum
(
runner
,
self
.
regular_mom
)
def
before_train_iter
(
self
,
runner
):
cur_iter
=
runner
.
iter
if
not
self
.
by_epoch
:
self
.
regular_mom
=
self
.
get_regular_momentum
(
runner
)
if
self
.
warmup
is
None
or
cur_iter
>=
self
.
warmup_iters
:
self
.
_set_momentum
(
runner
,
self
.
regular_mom
)
else
:
warmup_momentum
=
self
.
get_warmup_momentum
(
cur_iter
)
self
.
_set_momentum
(
runner
,
warmup_momentum
)
elif
self
.
by_epoch
:
if
self
.
warmup
is
None
or
cur_iter
>
self
.
warmup_iters
:
return
elif
cur_iter
==
self
.
warmup_iters
:
self
.
_set_momentum
(
runner
,
self
.
regular_mom
)
else
:
warmup_momentum
=
self
.
get_warmup_momentum
(
cur_iter
)
self
.
_set_momentum
(
runner
,
warmup_momentum
)
@
HOOKS
.
register_module
()
class
StepMomentumUpdaterHook
(
MomentumUpdaterHook
):
"""Step momentum scheduler with min value clipping.
Args:
step (int | list[int]): Step to decay the momentum. If an int value is
given, regard it as the decay interval. If a list is given, decay
momentum at these steps.
gamma (float, optional): Decay momentum ratio. Default: 0.5.
min_momentum (float, optional): Minimum momentum value to keep. If
momentum after decay is lower than this value, it will be clipped
accordingly. If None is given, we don't perform lr clipping.
Default: None.
"""
def
__init__
(
self
,
step
,
gamma
=
0.5
,
min_momentum
=
None
,
**
kwargs
):
if
isinstance
(
step
,
list
):
assert
mmcv
.
is_list_of
(
step
,
int
)
assert
all
([
s
>
0
for
s
in
step
])
elif
isinstance
(
step
,
int
):
assert
step
>
0
else
:
raise
TypeError
(
'"step" must be a list or integer'
)
self
.
step
=
step
self
.
gamma
=
gamma
self
.
min_momentum
=
min_momentum
super
(
StepMomentumUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
get_momentum
(
self
,
runner
,
base_momentum
):
progress
=
runner
.
epoch
if
self
.
by_epoch
else
runner
.
iter
# calculate exponential term
if
isinstance
(
self
.
step
,
int
):
exp
=
progress
//
self
.
step
else
:
exp
=
len
(
self
.
step
)
for
i
,
s
in
enumerate
(
self
.
step
):
if
progress
<
s
:
exp
=
i
break
momentum
=
base_momentum
*
(
self
.
gamma
**
exp
)
if
self
.
min_momentum
is
not
None
:
# clip to a minimum value
momentum
=
max
(
momentum
,
self
.
min_momentum
)
return
momentum
@
HOOKS
.
register_module
()
class
CosineAnnealingMomentumUpdaterHook
(
MomentumUpdaterHook
):
def
__init__
(
self
,
min_momentum
=
None
,
min_momentum_ratio
=
None
,
**
kwargs
):
assert
(
min_momentum
is
None
)
^
(
min_momentum_ratio
is
None
)
self
.
min_momentum
=
min_momentum
self
.
min_momentum_ratio
=
min_momentum_ratio
super
(
CosineAnnealingMomentumUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
get_momentum
(
self
,
runner
,
base_momentum
):
if
self
.
by_epoch
:
progress
=
runner
.
epoch
max_progress
=
runner
.
max_epochs
else
:
progress
=
runner
.
iter
max_progress
=
runner
.
max_iters
if
self
.
min_momentum_ratio
is
not
None
:
target_momentum
=
base_momentum
*
self
.
min_momentum_ratio
else
:
target_momentum
=
self
.
min_momentum
return
annealing_cos
(
base_momentum
,
target_momentum
,
progress
/
max_progress
)
@
HOOKS
.
register_module
()
class
CyclicMomentumUpdaterHook
(
MomentumUpdaterHook
):
"""Cyclic momentum Scheduler.
Implement the cyclical momentum scheduler policy described in
https://arxiv.org/pdf/1708.07120.pdf
This momentum scheduler usually used together with the CyclicLRUpdater
to improve the performance in the 3D detection area.
Attributes:
target_ratio (tuple[float]): Relative ratio of the lowest momentum and
the highest momentum to the initial momentum.
cyclic_times (int): Number of cycles during training
step_ratio_up (float): The ratio of the increasing process of momentum
in the total cycle.
by_epoch (bool): Whether to update momentum by epoch.
"""
def
__init__
(
self
,
by_epoch
=
False
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
**
kwargs
):
if
isinstance
(
target_ratio
,
float
):
target_ratio
=
(
target_ratio
,
target_ratio
/
1e5
)
elif
isinstance
(
target_ratio
,
tuple
):
target_ratio
=
(
target_ratio
[
0
],
target_ratio
[
0
]
/
1e5
)
\
if
len
(
target_ratio
)
==
1
else
target_ratio
else
:
raise
ValueError
(
'target_ratio should be either float '
f
'or tuple, got
{
type
(
target_ratio
)
}
'
)
assert
len
(
target_ratio
)
==
2
,
\
'"target_ratio" must be list or tuple of two floats'
assert
0
<=
step_ratio_up
<
1.0
,
\
'"step_ratio_up" must be in range [0,1)'
self
.
target_ratio
=
target_ratio
self
.
cyclic_times
=
cyclic_times
self
.
step_ratio_up
=
step_ratio_up
self
.
momentum_phases
=
[]
# init momentum_phases
# currently only support by_epoch=False
assert
not
by_epoch
,
\
'currently only support "by_epoch" = False'
super
(
CyclicMomentumUpdaterHook
,
self
).
__init__
(
by_epoch
,
**
kwargs
)
def
before_run
(
self
,
runner
):
super
(
CyclicMomentumUpdaterHook
,
self
).
before_run
(
runner
)
# initiate momentum_phases
# total momentum_phases are separated as up and down
max_iter_per_phase
=
runner
.
max_iters
//
self
.
cyclic_times
iter_up_phase
=
int
(
self
.
step_ratio_up
*
max_iter_per_phase
)
self
.
momentum_phases
.
append
(
[
0
,
iter_up_phase
,
max_iter_per_phase
,
1
,
self
.
target_ratio
[
0
]])
self
.
momentum_phases
.
append
([
iter_up_phase
,
max_iter_per_phase
,
max_iter_per_phase
,
self
.
target_ratio
[
0
],
self
.
target_ratio
[
1
]
])
def
get_momentum
(
self
,
runner
,
base_momentum
):
curr_iter
=
runner
.
iter
for
(
start_iter
,
end_iter
,
max_iter_per_phase
,
start_ratio
,
end_ratio
)
in
self
.
momentum_phases
:
curr_iter
%=
max_iter_per_phase
if
start_iter
<=
curr_iter
<
end_iter
:
progress
=
curr_iter
-
start_iter
return
annealing_cos
(
base_momentum
*
start_ratio
,
base_momentum
*
end_ratio
,
progress
/
(
end_iter
-
start_iter
))
@
HOOKS
.
register_module
()
class
OneCycleMomentumUpdaterHook
(
MomentumUpdaterHook
):
"""OneCycle momentum Scheduler.
This momentum scheduler usually used together with the OneCycleLrUpdater
to improve the performance.
Args:
base_momentum (float or list): Lower momentum boundaries in the cycle
for each parameter group. Note that momentum is cycled inversely
to learning rate; at the peak of a cycle, momentum is
'base_momentum' and learning rate is 'max_lr'.
Default: 0.85
max_momentum (float or list): Upper momentum boundaries in the cycle
for each parameter group. Functionally,
it defines the cycle amplitude (max_momentum - base_momentum).
Note that momentum is cycled inversely
to learning rate; at the start of a cycle, momentum is
'max_momentum' and learning rate is 'base_lr'
Default: 0.95
pct_start (float): The percentage of the cycle (in number of steps)
spent increasing the learning rate.
Default: 0.3
anneal_strategy (str): {'cos', 'linear'}
Specifies the annealing strategy: 'cos' for cosine annealing,
'linear' for linear annealing.
Default: 'cos'
three_phase (bool): If three_phase is True, use a third phase of the
schedule to annihilate the learning rate according to
final_div_factor instead of modifying the second phase (the first
two phases will be symmetrical about the step indicated by
pct_start).
Default: False
"""
def
__init__
(
self
,
base_momentum
=
0.85
,
max_momentum
=
0.95
,
pct_start
=
0.3
,
anneal_strategy
=
'cos'
,
three_phase
=
False
,
**
kwargs
):
# validate by_epoch, currently only support by_epoch=False
if
'by_epoch'
not
in
kwargs
:
kwargs
[
'by_epoch'
]
=
False
else
:
assert
not
kwargs
[
'by_epoch'
],
\
'currently only support "by_epoch" = False'
if
not
isinstance
(
base_momentum
,
(
float
,
list
,
dict
)):
raise
ValueError
(
'base_momentum must be the type among of float,'
'list or dict.'
)
self
.
_base_momentum
=
base_momentum
if
not
isinstance
(
max_momentum
,
(
float
,
list
,
dict
)):
raise
ValueError
(
'max_momentum must be the type among of float,'
'list or dict.'
)
self
.
_max_momentum
=
max_momentum
# validate pct_start
if
pct_start
<
0
or
pct_start
>
1
or
not
isinstance
(
pct_start
,
float
):
raise
ValueError
(
'Expected float between 0 and 1 pct_start, but '
f
'got
{
pct_start
}
'
)
self
.
pct_start
=
pct_start
# validate anneal_strategy
if
anneal_strategy
not
in
[
'cos'
,
'linear'
]:
raise
ValueError
(
'anneal_strategy must by one of "cos" or '
f
'"linear", instead got
{
anneal_strategy
}
'
)
elif
anneal_strategy
==
'cos'
:
self
.
anneal_func
=
annealing_cos
elif
anneal_strategy
==
'linear'
:
self
.
anneal_func
=
annealing_linear
self
.
three_phase
=
three_phase
self
.
momentum_phases
=
[]
# init momentum_phases
super
(
OneCycleMomentumUpdaterHook
,
self
).
__init__
(
**
kwargs
)
def
before_run
(
self
,
runner
):
if
isinstance
(
runner
.
optimizer
,
dict
):
for
k
,
optim
in
runner
.
optimizer
.
items
():
if
(
'momentum'
not
in
optim
.
defaults
and
'betas'
not
in
optim
.
defaults
):
raise
ValueError
(
'optimizer must support momentum with'
'option enabled'
)
self
.
use_beta1
=
'betas'
in
optim
.
defaults
_base_momentum
=
format_param
(
k
,
optim
,
self
.
_base_momentum
)
_max_momentum
=
format_param
(
k
,
optim
,
self
.
_max_momentum
)
for
group
,
b_momentum
,
m_momentum
in
zip
(
optim
.
param_groups
,
_base_momentum
,
_max_momentum
):
if
self
.
use_beta1
:
_
,
beta2
=
group
[
'betas'
]
group
[
'betas'
]
=
(
m_momentum
,
beta2
)
else
:
group
[
'momentum'
]
=
m_momentum
group
[
'base_momentum'
]
=
b_momentum
group
[
'max_momentum'
]
=
m_momentum
else
:
optim
=
runner
.
optimizer
if
(
'momentum'
not
in
optim
.
defaults
and
'betas'
not
in
optim
.
defaults
):
raise
ValueError
(
'optimizer must support momentum with'
'option enabled'
)
self
.
use_beta1
=
'betas'
in
optim
.
defaults
k
=
type
(
optim
).
__name__
_base_momentum
=
format_param
(
k
,
optim
,
self
.
_base_momentum
)
_max_momentum
=
format_param
(
k
,
optim
,
self
.
_max_momentum
)
for
group
,
b_momentum
,
m_momentum
in
zip
(
optim
.
param_groups
,
_base_momentum
,
_max_momentum
):
if
self
.
use_beta1
:
_
,
beta2
=
group
[
'betas'
]
group
[
'betas'
]
=
(
m_momentum
,
beta2
)
else
:
group
[
'momentum'
]
=
m_momentum
group
[
'base_momentum'
]
=
b_momentum
group
[
'max_momentum'
]
=
m_momentum
if
self
.
three_phase
:
self
.
momentum_phases
.
append
({
'end_iter'
:
float
(
self
.
pct_start
*
runner
.
max_iters
)
-
1
,
'start_momentum'
:
'max_momentum'
,
'end_momentum'
:
'base_momentum'
})
self
.
momentum_phases
.
append
({
'end_iter'
:
float
(
2
*
self
.
pct_start
*
runner
.
max_iters
)
-
2
,
'start_momentum'
:
'base_momentum'
,
'end_momentum'
:
'max_momentum'
})
self
.
momentum_phases
.
append
({
'end_iter'
:
runner
.
max_iters
-
1
,
'start_momentum'
:
'max_momentum'
,
'end_momentum'
:
'max_momentum'
})
else
:
self
.
momentum_phases
.
append
({
'end_iter'
:
float
(
self
.
pct_start
*
runner
.
max_iters
)
-
1
,
'start_momentum'
:
'max_momentum'
,
'end_momentum'
:
'base_momentum'
})
self
.
momentum_phases
.
append
({
'end_iter'
:
runner
.
max_iters
-
1
,
'start_momentum'
:
'base_momentum'
,
'end_momentum'
:
'max_momentum'
})
def
_set_momentum
(
self
,
runner
,
momentum_groups
):
if
isinstance
(
runner
.
optimizer
,
dict
):
for
k
,
optim
in
runner
.
optimizer
.
items
():
for
param_group
,
mom
in
zip
(
optim
.
param_groups
,
momentum_groups
[
k
]):
if
'momentum'
in
param_group
.
keys
():
param_group
[
'momentum'
]
=
mom
elif
'betas'
in
param_group
.
keys
():
param_group
[
'betas'
]
=
(
mom
,
param_group
[
'betas'
][
1
])
else
:
for
param_group
,
mom
in
zip
(
runner
.
optimizer
.
param_groups
,
momentum_groups
):
if
'momentum'
in
param_group
.
keys
():
param_group
[
'momentum'
]
=
mom
elif
'betas'
in
param_group
.
keys
():
param_group
[
'betas'
]
=
(
mom
,
param_group
[
'betas'
][
1
])
def
get_momentum
(
self
,
runner
,
param_group
):
curr_iter
=
runner
.
iter
start_iter
=
0
for
i
,
phase
in
enumerate
(
self
.
momentum_phases
):
end_iter
=
phase
[
'end_iter'
]
if
curr_iter
<=
end_iter
or
i
==
len
(
self
.
momentum_phases
)
-
1
:
pct
=
(
curr_iter
-
start_iter
)
/
(
end_iter
-
start_iter
)
momentum
=
self
.
anneal_func
(
param_group
[
phase
[
'start_momentum'
]],
param_group
[
phase
[
'end_momentum'
]],
pct
)
break
start_iter
=
end_iter
return
momentum
def
get_regular_momentum
(
self
,
runner
):
if
isinstance
(
runner
.
optimizer
,
dict
):
momentum_groups
=
{}
for
k
,
optim
in
runner
.
optimizer
.
items
():
_momentum_group
=
[
self
.
get_momentum
(
runner
,
param_group
)
for
param_group
in
optim
.
param_groups
]
momentum_groups
.
update
({
k
:
_momentum_group
})
return
momentum_groups
else
:
momentum_groups
=
[]
for
param_group
in
runner
.
optimizer
.
param_groups
:
momentum_groups
.
append
(
self
.
get_momentum
(
runner
,
param_group
))
return
momentum_groups
lavis/common/annotator/uniformer/mmcv/runner/hooks/optimizer.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
from
collections
import
defaultdict
from
itertools
import
chain
from
torch.nn.utils
import
clip_grad
from
annotator.uniformer.mmcv.utils
import
TORCH_VERSION
,
_BatchNorm
,
digit_version
from
..dist_utils
import
allreduce_grads
from
..fp16_utils
import
LossScaler
,
wrap_fp16_model
from
.hook
import
HOOKS
,
Hook
try
:
# If PyTorch version >= 1.6.0, torch.cuda.amp.GradScaler would be imported
# and used; otherwise, auto fp16 will adopt mmcv's implementation.
from
torch.cuda.amp
import
GradScaler
except
ImportError
:
pass
@
HOOKS
.
register_module
()
class
OptimizerHook
(
Hook
):
def
__init__
(
self
,
grad_clip
=
None
):
self
.
grad_clip
=
grad_clip
def
clip_grads
(
self
,
params
):
params
=
list
(
filter
(
lambda
p
:
p
.
requires_grad
and
p
.
grad
is
not
None
,
params
))
if
len
(
params
)
>
0
:
return
clip_grad
.
clip_grad_norm_
(
params
,
**
self
.
grad_clip
)
def
after_train_iter
(
self
,
runner
):
runner
.
optimizer
.
zero_grad
()
runner
.
outputs
[
'loss'
].
backward
()
if
self
.
grad_clip
is
not
None
:
grad_norm
=
self
.
clip_grads
(
runner
.
model
.
parameters
())
if
grad_norm
is
not
None
:
# Add grad norm to the logger
runner
.
log_buffer
.
update
({
'grad_norm'
:
float
(
grad_norm
)},
runner
.
outputs
[
'num_samples'
])
runner
.
optimizer
.
step
()
@
HOOKS
.
register_module
()
class
GradientCumulativeOptimizerHook
(
OptimizerHook
):
"""Optimizer Hook implements multi-iters gradient cumulating.
Args:
cumulative_iters (int, optional): Num of gradient cumulative iters.
The optimizer will step every `cumulative_iters` iters.
Defaults to 1.
Examples:
>>> # Use cumulative_iters to simulate a large batch size
>>> # It is helpful when the hardware cannot handle a large batch size.
>>> loader = DataLoader(data, batch_size=64)
>>> optim_hook = GradientCumulativeOptimizerHook(cumulative_iters=4)
>>> # almost equals to
>>> loader = DataLoader(data, batch_size=256)
>>> optim_hook = OptimizerHook()
"""
def
__init__
(
self
,
cumulative_iters
=
1
,
**
kwargs
):
super
(
GradientCumulativeOptimizerHook
,
self
).
__init__
(
**
kwargs
)
assert
isinstance
(
cumulative_iters
,
int
)
and
cumulative_iters
>
0
,
\
f
'cumulative_iters only accepts positive int, but got '
\
f
'
{
type
(
cumulative_iters
)
}
instead.'
self
.
cumulative_iters
=
cumulative_iters
self
.
divisible_iters
=
0
self
.
remainder_iters
=
0
self
.
initialized
=
False
def
has_batch_norm
(
self
,
module
):
if
isinstance
(
module
,
_BatchNorm
):
return
True
for
m
in
module
.
children
():
if
self
.
has_batch_norm
(
m
):
return
True
return
False
def
_init
(
self
,
runner
):
if
runner
.
iter
%
self
.
cumulative_iters
!=
0
:
runner
.
logger
.
warning
(
'Resume iter number is not divisible by cumulative_iters in '
'GradientCumulativeOptimizerHook, which means the gradient of '
'some iters is lost and the result may be influenced slightly.'
)
if
self
.
has_batch_norm
(
runner
.
model
)
and
self
.
cumulative_iters
>
1
:
runner
.
logger
.
warning
(
'GradientCumulativeOptimizerHook may slightly decrease '
'performance if the model has BatchNorm layers.'
)
residual_iters
=
runner
.
max_iters
-
runner
.
iter
self
.
divisible_iters
=
(
residual_iters
//
self
.
cumulative_iters
*
self
.
cumulative_iters
)
self
.
remainder_iters
=
residual_iters
-
self
.
divisible_iters
self
.
initialized
=
True
def
after_train_iter
(
self
,
runner
):
if
not
self
.
initialized
:
self
.
_init
(
runner
)
if
runner
.
iter
<
self
.
divisible_iters
:
loss_factor
=
self
.
cumulative_iters
else
:
loss_factor
=
self
.
remainder_iters
loss
=
runner
.
outputs
[
'loss'
]
loss
=
loss
/
loss_factor
loss
.
backward
()
if
(
self
.
every_n_iters
(
runner
,
self
.
cumulative_iters
)
or
self
.
is_last_iter
(
runner
)):
if
self
.
grad_clip
is
not
None
:
grad_norm
=
self
.
clip_grads
(
runner
.
model
.
parameters
())
if
grad_norm
is
not
None
:
# Add grad norm to the logger
runner
.
log_buffer
.
update
({
'grad_norm'
:
float
(
grad_norm
)},
runner
.
outputs
[
'num_samples'
])
runner
.
optimizer
.
step
()
runner
.
optimizer
.
zero_grad
()
if
(
TORCH_VERSION
!=
'parrots'
and
digit_version
(
TORCH_VERSION
)
>=
digit_version
(
'1.6.0'
)):
@
HOOKS
.
register_module
()
class
Fp16OptimizerHook
(
OptimizerHook
):
"""FP16 optimizer hook (using PyTorch's implementation).
If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend,
to take care of the optimization procedure.
Args:
loss_scale (float | str | dict): Scale factor configuration.
If loss_scale is a float, static loss scaling will be used with
the specified scale. If loss_scale is a string, it must be
'dynamic', then dynamic loss scaling will be used.
It can also be a dict containing arguments of GradScalar.
Defaults to 512. For Pytorch >= 1.6, mmcv uses official
implementation of GradScaler. If you use a dict version of
loss_scale to create GradScaler, please refer to:
https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler
for the parameters.
Examples:
>>> loss_scale = dict(
... init_scale=65536.0,
... growth_factor=2.0,
... backoff_factor=0.5,
... growth_interval=2000
... )
>>> optimizer_hook = Fp16OptimizerHook(loss_scale=loss_scale)
"""
def
__init__
(
self
,
grad_clip
=
None
,
coalesce
=
True
,
bucket_size_mb
=-
1
,
loss_scale
=
512.
,
distributed
=
True
):
self
.
grad_clip
=
grad_clip
self
.
coalesce
=
coalesce
self
.
bucket_size_mb
=
bucket_size_mb
self
.
distributed
=
distributed
self
.
_scale_update_param
=
None
if
loss_scale
==
'dynamic'
:
self
.
loss_scaler
=
GradScaler
()
elif
isinstance
(
loss_scale
,
float
):
self
.
_scale_update_param
=
loss_scale
self
.
loss_scaler
=
GradScaler
(
init_scale
=
loss_scale
)
elif
isinstance
(
loss_scale
,
dict
):
self
.
loss_scaler
=
GradScaler
(
**
loss_scale
)
else
:
raise
ValueError
(
'loss_scale must be of type float, dict, or '
f
'"dynamic", got
{
loss_scale
}
'
)
def
before_run
(
self
,
runner
):
"""Preparing steps before Mixed Precision Training."""
# wrap model mode to fp16
wrap_fp16_model
(
runner
.
model
)
# resume from state dict
if
'fp16'
in
runner
.
meta
and
'loss_scaler'
in
runner
.
meta
[
'fp16'
]:
scaler_state_dict
=
runner
.
meta
[
'fp16'
][
'loss_scaler'
]
self
.
loss_scaler
.
load_state_dict
(
scaler_state_dict
)
def
copy_grads_to_fp32
(
self
,
fp16_net
,
fp32_weights
):
"""Copy gradients from fp16 model to fp32 weight copy."""
for
fp32_param
,
fp16_param
in
zip
(
fp32_weights
,
fp16_net
.
parameters
()):
if
fp16_param
.
grad
is
not
None
:
if
fp32_param
.
grad
is
None
:
fp32_param
.
grad
=
fp32_param
.
data
.
new
(
fp32_param
.
size
())
fp32_param
.
grad
.
copy_
(
fp16_param
.
grad
)
def
copy_params_to_fp16
(
self
,
fp16_net
,
fp32_weights
):
"""Copy updated params from fp32 weight copy to fp16 model."""
for
fp16_param
,
fp32_param
in
zip
(
fp16_net
.
parameters
(),
fp32_weights
):
fp16_param
.
data
.
copy_
(
fp32_param
.
data
)
def
after_train_iter
(
self
,
runner
):
"""Backward optimization steps for Mixed Precision Training. For
dynamic loss scaling, please refer to
https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler.
1. Scale the loss by a scale factor.
2. Backward the loss to obtain the gradients.
3. Unscale the optimizer’s gradient tensors.
4. Call optimizer.step() and update scale factor.
5. Save loss_scaler state_dict for resume purpose.
"""
# clear grads of last iteration
runner
.
model
.
zero_grad
()
runner
.
optimizer
.
zero_grad
()
self
.
loss_scaler
.
scale
(
runner
.
outputs
[
'loss'
]).
backward
()
self
.
loss_scaler
.
unscale_
(
runner
.
optimizer
)
# grad clip
if
self
.
grad_clip
is
not
None
:
grad_norm
=
self
.
clip_grads
(
runner
.
model
.
parameters
())
if
grad_norm
is
not
None
:
# Add grad norm to the logger
runner
.
log_buffer
.
update
({
'grad_norm'
:
float
(
grad_norm
)},
runner
.
outputs
[
'num_samples'
])
# backward and update scaler
self
.
loss_scaler
.
step
(
runner
.
optimizer
)
self
.
loss_scaler
.
update
(
self
.
_scale_update_param
)
# save state_dict of loss_scaler
runner
.
meta
.
setdefault
(
'fp16'
,
{})[
'loss_scaler'
]
=
self
.
loss_scaler
.
state_dict
()
@
HOOKS
.
register_module
()
class
GradientCumulativeFp16OptimizerHook
(
GradientCumulativeOptimizerHook
,
Fp16OptimizerHook
):
"""Fp16 optimizer Hook (using PyTorch's implementation) implements
multi-iters gradient cumulating.
If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend,
to take care of the optimization procedure.
"""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
GradientCumulativeFp16OptimizerHook
,
self
).
__init__
(
*
args
,
**
kwargs
)
def
after_train_iter
(
self
,
runner
):
if
not
self
.
initialized
:
self
.
_init
(
runner
)
if
runner
.
iter
<
self
.
divisible_iters
:
loss_factor
=
self
.
cumulative_iters
else
:
loss_factor
=
self
.
remainder_iters
loss
=
runner
.
outputs
[
'loss'
]
loss
=
loss
/
loss_factor
self
.
loss_scaler
.
scale
(
loss
).
backward
()
if
(
self
.
every_n_iters
(
runner
,
self
.
cumulative_iters
)
or
self
.
is_last_iter
(
runner
)):
# copy fp16 grads in the model to fp32 params in the optimizer
self
.
loss_scaler
.
unscale_
(
runner
.
optimizer
)
if
self
.
grad_clip
is
not
None
:
grad_norm
=
self
.
clip_grads
(
runner
.
model
.
parameters
())
if
grad_norm
is
not
None
:
# Add grad norm to the logger
runner
.
log_buffer
.
update
(
{
'grad_norm'
:
float
(
grad_norm
)},
runner
.
outputs
[
'num_samples'
])
# backward and update scaler
self
.
loss_scaler
.
step
(
runner
.
optimizer
)
self
.
loss_scaler
.
update
(
self
.
_scale_update_param
)
# save state_dict of loss_scaler
runner
.
meta
.
setdefault
(
'fp16'
,
{})[
'loss_scaler'
]
=
self
.
loss_scaler
.
state_dict
()
# clear grads
runner
.
model
.
zero_grad
()
runner
.
optimizer
.
zero_grad
()
else
:
@
HOOKS
.
register_module
()
class
Fp16OptimizerHook
(
OptimizerHook
):
"""FP16 optimizer hook (mmcv's implementation).
The steps of fp16 optimizer is as follows.
1. Scale the loss value.
2. BP in the fp16 model.
2. Copy gradients from fp16 model to fp32 weights.
3. Update fp32 weights.
4. Copy updated parameters from fp32 weights to fp16 model.
Refer to https://arxiv.org/abs/1710.03740 for more details.
Args:
loss_scale (float | str | dict): Scale factor configuration.
If loss_scale is a float, static loss scaling will be used with
the specified scale. If loss_scale is a string, it must be
'dynamic', then dynamic loss scaling will be used.
It can also be a dict containing arguments of LossScaler.
Defaults to 512.
"""
def
__init__
(
self
,
grad_clip
=
None
,
coalesce
=
True
,
bucket_size_mb
=-
1
,
loss_scale
=
512.
,
distributed
=
True
):
self
.
grad_clip
=
grad_clip
self
.
coalesce
=
coalesce
self
.
bucket_size_mb
=
bucket_size_mb
self
.
distributed
=
distributed
if
loss_scale
==
'dynamic'
:
self
.
loss_scaler
=
LossScaler
(
mode
=
'dynamic'
)
elif
isinstance
(
loss_scale
,
float
):
self
.
loss_scaler
=
LossScaler
(
init_scale
=
loss_scale
,
mode
=
'static'
)
elif
isinstance
(
loss_scale
,
dict
):
self
.
loss_scaler
=
LossScaler
(
**
loss_scale
)
else
:
raise
ValueError
(
'loss_scale must be of type float, dict, or '
f
'"dynamic", got
{
loss_scale
}
'
)
def
before_run
(
self
,
runner
):
"""Preparing steps before Mixed Precision Training.
1. Make a master copy of fp32 weights for optimization.
2. Convert the main model from fp32 to fp16.
"""
# keep a copy of fp32 weights
old_groups
=
runner
.
optimizer
.
param_groups
runner
.
optimizer
.
param_groups
=
copy
.
deepcopy
(
runner
.
optimizer
.
param_groups
)
state
=
defaultdict
(
dict
)
p_map
=
{
old_p
:
p
for
old_p
,
p
in
zip
(
chain
(
*
(
g
[
'params'
]
for
g
in
old_groups
)),
chain
(
*
(
g
[
'params'
]
for
g
in
runner
.
optimizer
.
param_groups
)))
}
for
k
,
v
in
runner
.
optimizer
.
state
.
items
():
state
[
p_map
[
k
]]
=
v
runner
.
optimizer
.
state
=
state
# convert model to fp16
wrap_fp16_model
(
runner
.
model
)
# resume from state dict
if
'fp16'
in
runner
.
meta
and
'loss_scaler'
in
runner
.
meta
[
'fp16'
]:
scaler_state_dict
=
runner
.
meta
[
'fp16'
][
'loss_scaler'
]
self
.
loss_scaler
.
load_state_dict
(
scaler_state_dict
)
def
copy_grads_to_fp32
(
self
,
fp16_net
,
fp32_weights
):
"""Copy gradients from fp16 model to fp32 weight copy."""
for
fp32_param
,
fp16_param
in
zip
(
fp32_weights
,
fp16_net
.
parameters
()):
if
fp16_param
.
grad
is
not
None
:
if
fp32_param
.
grad
is
None
:
fp32_param
.
grad
=
fp32_param
.
data
.
new
(
fp32_param
.
size
())
fp32_param
.
grad
.
copy_
(
fp16_param
.
grad
)
def
copy_params_to_fp16
(
self
,
fp16_net
,
fp32_weights
):
"""Copy updated params from fp32 weight copy to fp16 model."""
for
fp16_param
,
fp32_param
in
zip
(
fp16_net
.
parameters
(),
fp32_weights
):
fp16_param
.
data
.
copy_
(
fp32_param
.
data
)
def
after_train_iter
(
self
,
runner
):
"""Backward optimization steps for Mixed Precision Training. For
dynamic loss scaling, please refer `loss_scalar.py`
1. Scale the loss by a scale factor.
2. Backward the loss to obtain the gradients (fp16).
3. Copy gradients from the model to the fp32 weight copy.
4. Scale the gradients back and update the fp32 weight copy.
5. Copy back the params from fp32 weight copy to the fp16 model.
6. Save loss_scaler state_dict for resume purpose.
"""
# clear grads of last iteration
runner
.
model
.
zero_grad
()
runner
.
optimizer
.
zero_grad
()
# scale the loss value
scaled_loss
=
runner
.
outputs
[
'loss'
]
*
self
.
loss_scaler
.
loss_scale
scaled_loss
.
backward
()
# copy fp16 grads in the model to fp32 params in the optimizer
fp32_weights
=
[]
for
param_group
in
runner
.
optimizer
.
param_groups
:
fp32_weights
+=
param_group
[
'params'
]
self
.
copy_grads_to_fp32
(
runner
.
model
,
fp32_weights
)
# allreduce grads
if
self
.
distributed
:
allreduce_grads
(
fp32_weights
,
self
.
coalesce
,
self
.
bucket_size_mb
)
has_overflow
=
self
.
loss_scaler
.
has_overflow
(
fp32_weights
)
# if has overflow, skip this iteration
if
not
has_overflow
:
# scale the gradients back
for
param
in
fp32_weights
:
if
param
.
grad
is
not
None
:
param
.
grad
.
div_
(
self
.
loss_scaler
.
loss_scale
)
if
self
.
grad_clip
is
not
None
:
grad_norm
=
self
.
clip_grads
(
fp32_weights
)
if
grad_norm
is
not
None
:
# Add grad norm to the logger
runner
.
log_buffer
.
update
(
{
'grad_norm'
:
float
(
grad_norm
)},
runner
.
outputs
[
'num_samples'
])
# update fp32 params
runner
.
optimizer
.
step
()
# copy fp32 params to the fp16 model
self
.
copy_params_to_fp16
(
runner
.
model
,
fp32_weights
)
self
.
loss_scaler
.
update_scale
(
has_overflow
)
if
has_overflow
:
runner
.
logger
.
warning
(
'Check overflow, downscale loss scale '
f
'to
{
self
.
loss_scaler
.
cur_scale
}
'
)
# save state_dict of loss_scaler
runner
.
meta
.
setdefault
(
'fp16'
,
{})[
'loss_scaler'
]
=
self
.
loss_scaler
.
state_dict
()
@
HOOKS
.
register_module
()
class
GradientCumulativeFp16OptimizerHook
(
GradientCumulativeOptimizerHook
,
Fp16OptimizerHook
):
"""Fp16 optimizer Hook (using mmcv implementation) implements multi-
iters gradient cumulating."""
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
GradientCumulativeFp16OptimizerHook
,
self
).
__init__
(
*
args
,
**
kwargs
)
def
after_train_iter
(
self
,
runner
):
if
not
self
.
initialized
:
self
.
_init
(
runner
)
if
runner
.
iter
<
self
.
divisible_iters
:
loss_factor
=
self
.
cumulative_iters
else
:
loss_factor
=
self
.
remainder_iters
loss
=
runner
.
outputs
[
'loss'
]
loss
=
loss
/
loss_factor
# scale the loss value
scaled_loss
=
loss
*
self
.
loss_scaler
.
loss_scale
scaled_loss
.
backward
()
if
(
self
.
every_n_iters
(
runner
,
self
.
cumulative_iters
)
or
self
.
is_last_iter
(
runner
)):
# copy fp16 grads in the model to fp32 params in the optimizer
fp32_weights
=
[]
for
param_group
in
runner
.
optimizer
.
param_groups
:
fp32_weights
+=
param_group
[
'params'
]
self
.
copy_grads_to_fp32
(
runner
.
model
,
fp32_weights
)
# allreduce grads
if
self
.
distributed
:
allreduce_grads
(
fp32_weights
,
self
.
coalesce
,
self
.
bucket_size_mb
)
has_overflow
=
self
.
loss_scaler
.
has_overflow
(
fp32_weights
)
# if has overflow, skip this iteration
if
not
has_overflow
:
# scale the gradients back
for
param
in
fp32_weights
:
if
param
.
grad
is
not
None
:
param
.
grad
.
div_
(
self
.
loss_scaler
.
loss_scale
)
if
self
.
grad_clip
is
not
None
:
grad_norm
=
self
.
clip_grads
(
fp32_weights
)
if
grad_norm
is
not
None
:
# Add grad norm to the logger
runner
.
log_buffer
.
update
(
{
'grad_norm'
:
float
(
grad_norm
)},
runner
.
outputs
[
'num_samples'
])
# update fp32 params
runner
.
optimizer
.
step
()
# copy fp32 params to the fp16 model
self
.
copy_params_to_fp16
(
runner
.
model
,
fp32_weights
)
else
:
runner
.
logger
.
warning
(
'Check overflow, downscale loss scale '
f
'to
{
self
.
loss_scaler
.
cur_scale
}
'
)
self
.
loss_scaler
.
update_scale
(
has_overflow
)
# save state_dict of loss_scaler
runner
.
meta
.
setdefault
(
'fp16'
,
{})[
'loss_scaler'
]
=
self
.
loss_scaler
.
state_dict
()
# clear grads
runner
.
model
.
zero_grad
()
runner
.
optimizer
.
zero_grad
()
lavis/common/annotator/uniformer/mmcv/runner/hooks/profiler.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
from
typing
import
Callable
,
List
,
Optional
,
Union
import
torch
from
..dist_utils
import
master_only
from
.hook
import
HOOKS
,
Hook
@
HOOKS
.
register_module
()
class
ProfilerHook
(
Hook
):
"""Profiler to analyze performance during training.
PyTorch Profiler is a tool that allows the collection of the performance
metrics during the training. More details on Profiler can be found at
https://pytorch.org/docs/1.8.1/profiler.html#torch.profiler.profile
Args:
by_epoch (bool): Profile performance by epoch or by iteration.
Default: True.
profile_iters (int): Number of iterations for profiling.
If ``by_epoch=True``, profile_iters indicates that they are the
first profile_iters epochs at the beginning of the
training, otherwise it indicates the first profile_iters
iterations. Default: 1.
activities (list[str]): List of activity groups (CPU, CUDA) to use in
profiling. Default: ['cpu', 'cuda'].
schedule (dict, optional): Config of generating the callable schedule.
if schedule is None, profiler will not add step markers into the
trace and table view. Default: None.
on_trace_ready (callable, dict): Either a handler or a dict of generate
handler. Default: None.
record_shapes (bool): Save information about operator's input shapes.
Default: False.
profile_memory (bool): Track tensor memory allocation/deallocation.
Default: False.
with_stack (bool): Record source information (file and line number)
for the ops. Default: False.
with_flops (bool): Use formula to estimate the FLOPS of specific
operators (matrix multiplication and 2D convolution).
Default: False.
json_trace_path (str, optional): Exports the collected trace in Chrome
JSON format. Default: None.
Example:
>>> runner = ... # instantiate a Runner
>>> # tensorboard trace
>>> trace_config = dict(type='tb_trace', dir_name='work_dir')
>>> profiler_config = dict(on_trace_ready=trace_config)
>>> runner.register_profiler_hook(profiler_config)
>>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)])
"""
def
__init__
(
self
,
by_epoch
:
bool
=
True
,
profile_iters
:
int
=
1
,
activities
:
List
[
str
]
=
[
'cpu'
,
'cuda'
],
schedule
:
Optional
[
dict
]
=
None
,
on_trace_ready
:
Optional
[
Union
[
Callable
,
dict
]]
=
None
,
record_shapes
:
bool
=
False
,
profile_memory
:
bool
=
False
,
with_stack
:
bool
=
False
,
with_flops
:
bool
=
False
,
json_trace_path
:
Optional
[
str
]
=
None
)
->
None
:
try
:
from
torch
import
profiler
# torch version >= 1.8.1
except
ImportError
:
raise
ImportError
(
'profiler is the new feature of torch1.8.1, '
f
'but your version is
{
torch
.
__version__
}
'
)
assert
isinstance
(
by_epoch
,
bool
),
'``by_epoch`` should be a boolean.'
self
.
by_epoch
=
by_epoch
if
profile_iters
<
1
:
raise
ValueError
(
'profile_iters should be greater than 0, but got '
f
'
{
profile_iters
}
'
)
self
.
profile_iters
=
profile_iters
if
not
isinstance
(
activities
,
list
):
raise
ValueError
(
f
'activities should be list, but got
{
type
(
activities
)
}
'
)
self
.
activities
=
[]
for
activity
in
activities
:
activity
=
activity
.
lower
()
if
activity
==
'cpu'
:
self
.
activities
.
append
(
profiler
.
ProfilerActivity
.
CPU
)
elif
activity
==
'cuda'
:
self
.
activities
.
append
(
profiler
.
ProfilerActivity
.
CUDA
)
else
:
raise
ValueError
(
f
'activity should be "cpu" or "cuda", but got
{
activity
}
'
)
if
schedule
is
not
None
:
self
.
schedule
=
profiler
.
schedule
(
**
schedule
)
else
:
self
.
schedule
=
None
self
.
on_trace_ready
=
on_trace_ready
self
.
record_shapes
=
record_shapes
self
.
profile_memory
=
profile_memory
self
.
with_stack
=
with_stack
self
.
with_flops
=
with_flops
self
.
json_trace_path
=
json_trace_path
@
master_only
def
before_run
(
self
,
runner
):
if
self
.
by_epoch
and
runner
.
max_epochs
<
self
.
profile_iters
:
raise
ValueError
(
'self.profile_iters should not be greater than '
f
'
{
runner
.
max_epochs
}
'
)
if
not
self
.
by_epoch
and
runner
.
max_iters
<
self
.
profile_iters
:
raise
ValueError
(
'self.profile_iters should not be greater than '
f
'
{
runner
.
max_iters
}
'
)
if
callable
(
self
.
on_trace_ready
):
# handler
_on_trace_ready
=
self
.
on_trace_ready
elif
isinstance
(
self
.
on_trace_ready
,
dict
):
# config of handler
trace_cfg
=
self
.
on_trace_ready
.
copy
()
trace_type
=
trace_cfg
.
pop
(
'type'
)
# log_trace handler
if
trace_type
==
'log_trace'
:
def
_log_handler
(
prof
):
print
(
prof
.
key_averages
().
table
(
**
trace_cfg
))
_on_trace_ready
=
_log_handler
elif
trace_type
==
'tb_trace'
:
# tensorboard_trace handler
try
:
import
torch_tb_profiler
# noqa: F401
except
ImportError
:
raise
ImportError
(
'please run "pip install '
'torch-tb-profiler" to install '
'torch_tb_profiler'
)
_on_trace_ready
=
torch
.
profiler
.
tensorboard_trace_handler
(
**
trace_cfg
)
else
:
raise
ValueError
(
'trace_type should be "log_trace" or '
f
'"tb_trace", but got
{
trace_type
}
'
)
elif
self
.
on_trace_ready
is
None
:
_on_trace_ready
=
None
# type: ignore
else
:
raise
ValueError
(
'on_trace_ready should be handler, dict or None, '
f
'but got
{
type
(
self
.
on_trace_ready
)
}
'
)
if
runner
.
max_epochs
>
1
:
warnings
.
warn
(
f
'profiler will profile
{
runner
.
max_epochs
}
epochs '
'instead of 1 epoch. Since profiler will slow down '
'the training, it is recommended to train 1 epoch '
'with ProfilerHook and adjust your setting according'
' to the profiler summary. During normal training '
'(epoch > 1), you may disable the ProfilerHook.'
)
self
.
profiler
=
torch
.
profiler
.
profile
(
activities
=
self
.
activities
,
schedule
=
self
.
schedule
,
on_trace_ready
=
_on_trace_ready
,
record_shapes
=
self
.
record_shapes
,
profile_memory
=
self
.
profile_memory
,
with_stack
=
self
.
with_stack
,
with_flops
=
self
.
with_flops
)
self
.
profiler
.
__enter__
()
runner
.
logger
.
info
(
'profiler is profiling...'
)
@
master_only
def
after_train_epoch
(
self
,
runner
):
if
self
.
by_epoch
and
runner
.
epoch
==
self
.
profile_iters
-
1
:
runner
.
logger
.
info
(
'profiler may take a few minutes...'
)
self
.
profiler
.
__exit__
(
None
,
None
,
None
)
if
self
.
json_trace_path
is
not
None
:
self
.
profiler
.
export_chrome_trace
(
self
.
json_trace_path
)
@
master_only
def
after_train_iter
(
self
,
runner
):
self
.
profiler
.
step
()
if
not
self
.
by_epoch
and
runner
.
iter
==
self
.
profile_iters
-
1
:
runner
.
logger
.
info
(
'profiler may take a few minutes...'
)
self
.
profiler
.
__exit__
(
None
,
None
,
None
)
if
self
.
json_trace_path
is
not
None
:
self
.
profiler
.
export_chrome_trace
(
self
.
json_trace_path
)
lavis/common/annotator/uniformer/mmcv/runner/hooks/sampler_seed.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
.hook
import
HOOKS
,
Hook
@
HOOKS
.
register_module
()
class
DistSamplerSeedHook
(
Hook
):
"""Data-loading sampler for distributed training.
When distributed training, it is only useful in conjunction with
:obj:`EpochBasedRunner`, while :obj:`IterBasedRunner` achieves the same
purpose with :obj:`IterLoader`.
"""
def
before_epoch
(
self
,
runner
):
if
hasattr
(
runner
.
data_loader
.
sampler
,
'set_epoch'
):
# in case the data loader uses `SequentialSampler` in Pytorch
runner
.
data_loader
.
sampler
.
set_epoch
(
runner
.
epoch
)
elif
hasattr
(
runner
.
data_loader
.
batch_sampler
.
sampler
,
'set_epoch'
):
# batch sampler in pytorch warps the sampler as its attributes.
runner
.
data_loader
.
batch_sampler
.
sampler
.
set_epoch
(
runner
.
epoch
)
lavis/common/annotator/uniformer/mmcv/runner/hooks/sync_buffer.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
..dist_utils
import
allreduce_params
from
.hook
import
HOOKS
,
Hook
@
HOOKS
.
register_module
()
class
SyncBuffersHook
(
Hook
):
"""Synchronize model buffers such as running_mean and running_var in BN at
the end of each epoch.
Args:
distributed (bool): Whether distributed training is used. It is
effective only for distributed training. Defaults to True.
"""
def
__init__
(
self
,
distributed
=
True
):
self
.
distributed
=
distributed
def
after_epoch
(
self
,
runner
):
"""All-reduce model buffers at the end of each epoch."""
if
self
.
distributed
:
allreduce_params
(
runner
.
model
.
buffers
())
lavis/common/annotator/uniformer/mmcv/runner/iter_based_runner.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
os.path
as
osp
import
platform
import
shutil
import
time
import
warnings
import
torch
from
torch.optim
import
Optimizer
import
annotator.uniformer.mmcv
as
mmcv
from
.base_runner
import
BaseRunner
from
.builder
import
RUNNERS
from
.checkpoint
import
save_checkpoint
from
.hooks
import
IterTimerHook
from
.utils
import
get_host_info
class
IterLoader
:
def
__init__
(
self
,
dataloader
):
self
.
_dataloader
=
dataloader
self
.
iter_loader
=
iter
(
self
.
_dataloader
)
self
.
_epoch
=
0
@
property
def
epoch
(
self
):
return
self
.
_epoch
def
__next__
(
self
):
try
:
data
=
next
(
self
.
iter_loader
)
except
StopIteration
:
self
.
_epoch
+=
1
if
hasattr
(
self
.
_dataloader
.
sampler
,
'set_epoch'
):
self
.
_dataloader
.
sampler
.
set_epoch
(
self
.
_epoch
)
time
.
sleep
(
2
)
# Prevent possible deadlock during epoch transition
self
.
iter_loader
=
iter
(
self
.
_dataloader
)
data
=
next
(
self
.
iter_loader
)
return
data
def
__len__
(
self
):
return
len
(
self
.
_dataloader
)
@
RUNNERS
.
register_module
()
class
IterBasedRunner
(
BaseRunner
):
"""Iteration-based Runner.
This runner train models iteration by iteration.
"""
def
train
(
self
,
data_loader
,
**
kwargs
):
self
.
model
.
train
()
self
.
mode
=
'train'
self
.
data_loader
=
data_loader
self
.
_epoch
=
data_loader
.
epoch
data_batch
=
next
(
data_loader
)
self
.
call_hook
(
'before_train_iter'
)
outputs
=
self
.
model
.
train_step
(
data_batch
,
self
.
optimizer
,
**
kwargs
)
if
not
isinstance
(
outputs
,
dict
):
raise
TypeError
(
'model.train_step() must return a dict'
)
if
'log_vars'
in
outputs
:
self
.
log_buffer
.
update
(
outputs
[
'log_vars'
],
outputs
[
'num_samples'
])
self
.
outputs
=
outputs
self
.
call_hook
(
'after_train_iter'
)
self
.
_inner_iter
+=
1
self
.
_iter
+=
1
@
torch
.
no_grad
()
def
val
(
self
,
data_loader
,
**
kwargs
):
self
.
model
.
eval
()
self
.
mode
=
'val'
self
.
data_loader
=
data_loader
data_batch
=
next
(
data_loader
)
self
.
call_hook
(
'before_val_iter'
)
outputs
=
self
.
model
.
val_step
(
data_batch
,
**
kwargs
)
if
not
isinstance
(
outputs
,
dict
):
raise
TypeError
(
'model.val_step() must return a dict'
)
if
'log_vars'
in
outputs
:
self
.
log_buffer
.
update
(
outputs
[
'log_vars'
],
outputs
[
'num_samples'
])
self
.
outputs
=
outputs
self
.
call_hook
(
'after_val_iter'
)
self
.
_inner_iter
+=
1
def
run
(
self
,
data_loaders
,
workflow
,
max_iters
=
None
,
**
kwargs
):
"""Start running.
Args:
data_loaders (list[:obj:`DataLoader`]): Dataloaders for training
and validation.
workflow (list[tuple]): A list of (phase, iters) to specify the
running order and iterations. E.g, [('train', 10000),
('val', 1000)] means running 10000 iterations for training and
1000 iterations for validation, iteratively.
"""
assert
isinstance
(
data_loaders
,
list
)
assert
mmcv
.
is_list_of
(
workflow
,
tuple
)
assert
len
(
data_loaders
)
==
len
(
workflow
)
if
max_iters
is
not
None
:
warnings
.
warn
(
'setting max_iters in run is deprecated, '
'please set max_iters in runner_config'
,
DeprecationWarning
)
self
.
_max_iters
=
max_iters
assert
self
.
_max_iters
is
not
None
,
(
'max_iters must be specified during instantiation'
)
work_dir
=
self
.
work_dir
if
self
.
work_dir
is
not
None
else
'NONE'
self
.
logger
.
info
(
'Start running, host: %s, work_dir: %s'
,
get_host_info
(),
work_dir
)
self
.
logger
.
info
(
'Hooks will be executed in the following order:
\n
%s'
,
self
.
get_hook_info
())
self
.
logger
.
info
(
'workflow: %s, max: %d iters'
,
workflow
,
self
.
_max_iters
)
self
.
call_hook
(
'before_run'
)
iter_loaders
=
[
IterLoader
(
x
)
for
x
in
data_loaders
]
self
.
call_hook
(
'before_epoch'
)
while
self
.
iter
<
self
.
_max_iters
:
for
i
,
flow
in
enumerate
(
workflow
):
self
.
_inner_iter
=
0
mode
,
iters
=
flow
if
not
isinstance
(
mode
,
str
)
or
not
hasattr
(
self
,
mode
):
raise
ValueError
(
'runner has no method named "{}" to run a workflow'
.
format
(
mode
))
iter_runner
=
getattr
(
self
,
mode
)
for
_
in
range
(
iters
):
if
mode
==
'train'
and
self
.
iter
>=
self
.
_max_iters
:
break
iter_runner
(
iter_loaders
[
i
],
**
kwargs
)
time
.
sleep
(
1
)
# wait for some hooks like loggers to finish
self
.
call_hook
(
'after_epoch'
)
self
.
call_hook
(
'after_run'
)
def
resume
(
self
,
checkpoint
,
resume_optimizer
=
True
,
map_location
=
'default'
):
"""Resume model from checkpoint.
Args:
checkpoint (str): Checkpoint to resume from.
resume_optimizer (bool, optional): Whether resume the optimizer(s)
if the checkpoint file includes optimizer(s). Default to True.
map_location (str, optional): Same as :func:`torch.load`.
Default to 'default'.
"""
if
map_location
==
'default'
:
device_id
=
torch
.
cuda
.
current_device
()
checkpoint
=
self
.
load_checkpoint
(
checkpoint
,
map_location
=
lambda
storage
,
loc
:
storage
.
cuda
(
device_id
))
else
:
checkpoint
=
self
.
load_checkpoint
(
checkpoint
,
map_location
=
map_location
)
self
.
_epoch
=
checkpoint
[
'meta'
][
'epoch'
]
self
.
_iter
=
checkpoint
[
'meta'
][
'iter'
]
self
.
_inner_iter
=
checkpoint
[
'meta'
][
'iter'
]
if
'optimizer'
in
checkpoint
and
resume_optimizer
:
if
isinstance
(
self
.
optimizer
,
Optimizer
):
self
.
optimizer
.
load_state_dict
(
checkpoint
[
'optimizer'
])
elif
isinstance
(
self
.
optimizer
,
dict
):
for
k
in
self
.
optimizer
.
keys
():
self
.
optimizer
[
k
].
load_state_dict
(
checkpoint
[
'optimizer'
][
k
])
else
:
raise
TypeError
(
'Optimizer should be dict or torch.optim.Optimizer '
f
'but got
{
type
(
self
.
optimizer
)
}
'
)
self
.
logger
.
info
(
f
'resumed from epoch:
{
self
.
epoch
}
, iter
{
self
.
iter
}
'
)
def
save_checkpoint
(
self
,
out_dir
,
filename_tmpl
=
'iter_{}.pth'
,
meta
=
None
,
save_optimizer
=
True
,
create_symlink
=
True
):
"""Save checkpoint to file.
Args:
out_dir (str): Directory to save checkpoint files.
filename_tmpl (str, optional): Checkpoint file template.
Defaults to 'iter_{}.pth'.
meta (dict, optional): Metadata to be saved in checkpoint.
Defaults to None.
save_optimizer (bool, optional): Whether save optimizer.
Defaults to True.
create_symlink (bool, optional): Whether create symlink to the
latest checkpoint file. Defaults to True.
"""
if
meta
is
None
:
meta
=
{}
elif
not
isinstance
(
meta
,
dict
):
raise
TypeError
(
f
'meta should be a dict or None, but got
{
type
(
meta
)
}
'
)
if
self
.
meta
is
not
None
:
meta
.
update
(
self
.
meta
)
# Note: meta.update(self.meta) should be done before
# meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise
# there will be problems with resumed checkpoints.
# More details in https://github.com/open-mmlab/mmcv/pull/1108
meta
.
update
(
epoch
=
self
.
epoch
+
1
,
iter
=
self
.
iter
)
filename
=
filename_tmpl
.
format
(
self
.
iter
+
1
)
filepath
=
osp
.
join
(
out_dir
,
filename
)
optimizer
=
self
.
optimizer
if
save_optimizer
else
None
save_checkpoint
(
self
.
model
,
filepath
,
optimizer
=
optimizer
,
meta
=
meta
)
# in some environments, `os.symlink` is not supported, you may need to
# set `create_symlink` to False
if
create_symlink
:
dst_file
=
osp
.
join
(
out_dir
,
'latest.pth'
)
if
platform
.
system
()
!=
'Windows'
:
mmcv
.
symlink
(
filename
,
dst_file
)
else
:
shutil
.
copy
(
filepath
,
dst_file
)
def
register_training_hooks
(
self
,
lr_config
,
optimizer_config
=
None
,
checkpoint_config
=
None
,
log_config
=
None
,
momentum_config
=
None
,
custom_hooks_config
=
None
):
"""Register default hooks for iter-based training.
Checkpoint hook, optimizer stepper hook and logger hooks will be set to
`by_epoch=False` by default.
Default hooks include:
+----------------------+-------------------------+
| Hooks | Priority |
+======================+=========================+
| LrUpdaterHook | VERY_HIGH (10) |
+----------------------+-------------------------+
| MomentumUpdaterHook | HIGH (30) |
+----------------------+-------------------------+
| OptimizerStepperHook | ABOVE_NORMAL (40) |
+----------------------+-------------------------+
| CheckpointSaverHook | NORMAL (50) |
+----------------------+-------------------------+
| IterTimerHook | LOW (70) |
+----------------------+-------------------------+
| LoggerHook(s) | VERY_LOW (90) |
+----------------------+-------------------------+
| CustomHook(s) | defaults to NORMAL (50) |
+----------------------+-------------------------+
If custom hooks have same priority with default hooks, custom hooks
will be triggered after default hooks.
"""
if
checkpoint_config
is
not
None
:
checkpoint_config
.
setdefault
(
'by_epoch'
,
False
)
if
lr_config
is
not
None
:
lr_config
.
setdefault
(
'by_epoch'
,
False
)
if
log_config
is
not
None
:
for
info
in
log_config
[
'hooks'
]:
info
.
setdefault
(
'by_epoch'
,
False
)
super
(
IterBasedRunner
,
self
).
register_training_hooks
(
lr_config
=
lr_config
,
momentum_config
=
momentum_config
,
optimizer_config
=
optimizer_config
,
checkpoint_config
=
checkpoint_config
,
log_config
=
log_config
,
timer_config
=
IterTimerHook
(),
custom_hooks_config
=
custom_hooks_config
)
lavis/common/annotator/uniformer/mmcv/runner/log_buffer.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
OrderedDict
import
numpy
as
np
class
LogBuffer
:
def
__init__
(
self
):
self
.
val_history
=
OrderedDict
()
self
.
n_history
=
OrderedDict
()
self
.
output
=
OrderedDict
()
self
.
ready
=
False
def
clear
(
self
):
self
.
val_history
.
clear
()
self
.
n_history
.
clear
()
self
.
clear_output
()
def
clear_output
(
self
):
self
.
output
.
clear
()
self
.
ready
=
False
def
update
(
self
,
vars
,
count
=
1
):
assert
isinstance
(
vars
,
dict
)
for
key
,
var
in
vars
.
items
():
if
key
not
in
self
.
val_history
:
self
.
val_history
[
key
]
=
[]
self
.
n_history
[
key
]
=
[]
self
.
val_history
[
key
].
append
(
var
)
self
.
n_history
[
key
].
append
(
count
)
def
average
(
self
,
n
=
0
):
"""Average latest n values or all values."""
assert
n
>=
0
for
key
in
self
.
val_history
:
values
=
np
.
array
(
self
.
val_history
[
key
][
-
n
:])
nums
=
np
.
array
(
self
.
n_history
[
key
][
-
n
:])
avg
=
np
.
sum
(
values
*
nums
)
/
np
.
sum
(
nums
)
self
.
output
[
key
]
=
avg
self
.
ready
=
True
lavis/common/annotator/uniformer/mmcv/runner/optimizer/__init__.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
.builder
import
(
OPTIMIZER_BUILDERS
,
OPTIMIZERS
,
build_optimizer
,
build_optimizer_constructor
)
from
.default_constructor
import
DefaultOptimizerConstructor
__all__
=
[
'OPTIMIZER_BUILDERS'
,
'OPTIMIZERS'
,
'DefaultOptimizerConstructor'
,
'build_optimizer'
,
'build_optimizer_constructor'
]
lavis/common/annotator/uniformer/mmcv/runner/optimizer/builder.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
inspect
import
torch
from
...utils
import
Registry
,
build_from_cfg
OPTIMIZERS
=
Registry
(
'optimizer'
)
OPTIMIZER_BUILDERS
=
Registry
(
'optimizer builder'
)
def
register_torch_optimizers
():
torch_optimizers
=
[]
for
module_name
in
dir
(
torch
.
optim
):
if
module_name
.
startswith
(
'__'
):
continue
_optim
=
getattr
(
torch
.
optim
,
module_name
)
if
inspect
.
isclass
(
_optim
)
and
issubclass
(
_optim
,
torch
.
optim
.
Optimizer
):
OPTIMIZERS
.
register_module
()(
_optim
)
torch_optimizers
.
append
(
module_name
)
return
torch_optimizers
TORCH_OPTIMIZERS
=
register_torch_optimizers
()
def
build_optimizer_constructor
(
cfg
):
return
build_from_cfg
(
cfg
,
OPTIMIZER_BUILDERS
)
def
build_optimizer
(
model
,
cfg
):
optimizer_cfg
=
copy
.
deepcopy
(
cfg
)
constructor_type
=
optimizer_cfg
.
pop
(
'constructor'
,
'DefaultOptimizerConstructor'
)
paramwise_cfg
=
optimizer_cfg
.
pop
(
'paramwise_cfg'
,
None
)
optim_constructor
=
build_optimizer_constructor
(
dict
(
type
=
constructor_type
,
optimizer_cfg
=
optimizer_cfg
,
paramwise_cfg
=
paramwise_cfg
))
optimizer
=
optim_constructor
(
model
)
return
optimizer
lavis/common/annotator/uniformer/mmcv/runner/optimizer/default_constructor.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
import
torch
from
torch.nn
import
GroupNorm
,
LayerNorm
from
annotator.uniformer.mmcv.utils
import
_BatchNorm
,
_InstanceNorm
,
build_from_cfg
,
is_list_of
from
annotator.uniformer.mmcv.utils.ext_loader
import
check_ops_exist
from
.builder
import
OPTIMIZER_BUILDERS
,
OPTIMIZERS
@
OPTIMIZER_BUILDERS
.
register_module
()
class
DefaultOptimizerConstructor
:
"""Default constructor for optimizers.
By default each parameter share the same optimizer settings, and we
provide an argument ``paramwise_cfg`` to specify parameter-wise settings.
It is a dict and may contain the following fields:
- ``custom_keys`` (dict): Specified parameters-wise settings by keys. If
one of the keys in ``custom_keys`` is a substring of the name of one
parameter, then the setting of the parameter will be specified by
``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will
be ignored. It should be noted that the aforementioned ``key`` is the
longest key that is a substring of the name of the parameter. If there
are multiple matched keys with the same length, then the key with lower
alphabet order will be chosen.
``custom_keys[key]`` should be a dict and may contain fields ``lr_mult``
and ``decay_mult``. See Example 2 below.
- ``bias_lr_mult`` (float): It will be multiplied to the learning
rate for all bias parameters (except for those in normalization
layers and offset layers of DCN).
- ``bias_decay_mult`` (float): It will be multiplied to the weight
decay for all bias parameters (except for those in
normalization layers, depthwise conv layers, offset layers of DCN).
- ``norm_decay_mult`` (float): It will be multiplied to the weight
decay for all weight and bias parameters of normalization
layers.
- ``dwconv_decay_mult`` (float): It will be multiplied to the weight
decay for all weight and bias parameters of depthwise conv
layers.
- ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning
rate for parameters of offset layer in the deformable convs
of a model.
- ``bypass_duplicate`` (bool): If true, the duplicate parameters
would not be added into optimizer. Default: False.
Note:
1. If the option ``dcn_offset_lr_mult`` is used, the constructor will
override the effect of ``bias_lr_mult`` in the bias of offset
layer. So be careful when using both ``bias_lr_mult`` and
``dcn_offset_lr_mult``. If you wish to apply both of them to the
offset layer in deformable convs, set ``dcn_offset_lr_mult``
to the original ``dcn_offset_lr_mult`` * ``bias_lr_mult``.
2. If the option ``dcn_offset_lr_mult`` is used, the constructor will
apply it to all the DCN layers in the model. So be careful when
the model contains multiple DCN layers in places other than
backbone.
Args:
model (:obj:`nn.Module`): The model with parameters to be optimized.
optimizer_cfg (dict): The config dict of the optimizer.
Positional fields are
- `type`: class name of the optimizer.
Optional fields are
- any arguments of the corresponding optimizer type, e.g.,
lr, weight_decay, momentum, etc.
paramwise_cfg (dict, optional): Parameter-wise options.
Example 1:
>>> model = torch.nn.modules.Conv1d(1, 1, 1)
>>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
>>> weight_decay=0.0001)
>>> paramwise_cfg = dict(norm_decay_mult=0.)
>>> optim_builder = DefaultOptimizerConstructor(
>>> optimizer_cfg, paramwise_cfg)
>>> optimizer = optim_builder(model)
Example 2:
>>> # assume model have attribute model.backbone and model.cls_head
>>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95)
>>> paramwise_cfg = dict(custom_keys={
'.backbone': dict(lr_mult=0.1, decay_mult=0.9)})
>>> optim_builder = DefaultOptimizerConstructor(
>>> optimizer_cfg, paramwise_cfg)
>>> optimizer = optim_builder(model)
>>> # Then the `lr` and `weight_decay` for model.backbone is
>>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for
>>> # model.cls_head is (0.01, 0.95).
"""
def
__init__
(
self
,
optimizer_cfg
,
paramwise_cfg
=
None
):
if
not
isinstance
(
optimizer_cfg
,
dict
):
raise
TypeError
(
'optimizer_cfg should be a dict'
,
f
'but got
{
type
(
optimizer_cfg
)
}
'
)
self
.
optimizer_cfg
=
optimizer_cfg
self
.
paramwise_cfg
=
{}
if
paramwise_cfg
is
None
else
paramwise_cfg
self
.
base_lr
=
optimizer_cfg
.
get
(
'lr'
,
None
)
self
.
base_wd
=
optimizer_cfg
.
get
(
'weight_decay'
,
None
)
self
.
_validate_cfg
()
def
_validate_cfg
(
self
):
if
not
isinstance
(
self
.
paramwise_cfg
,
dict
):
raise
TypeError
(
'paramwise_cfg should be None or a dict, '
f
'but got
{
type
(
self
.
paramwise_cfg
)
}
'
)
if
'custom_keys'
in
self
.
paramwise_cfg
:
if
not
isinstance
(
self
.
paramwise_cfg
[
'custom_keys'
],
dict
):
raise
TypeError
(
'If specified, custom_keys must be a dict, '
f
'but got
{
type
(
self
.
paramwise_cfg
[
"custom_keys"
])
}
'
)
if
self
.
base_wd
is
None
:
for
key
in
self
.
paramwise_cfg
[
'custom_keys'
]:
if
'decay_mult'
in
self
.
paramwise_cfg
[
'custom_keys'
][
key
]:
raise
ValueError
(
'base_wd should not be None'
)
# get base lr and weight decay
# weight_decay must be explicitly specified if mult is specified
if
(
'bias_decay_mult'
in
self
.
paramwise_cfg
or
'norm_decay_mult'
in
self
.
paramwise_cfg
or
'dwconv_decay_mult'
in
self
.
paramwise_cfg
):
if
self
.
base_wd
is
None
:
raise
ValueError
(
'base_wd should not be None'
)
def
_is_in
(
self
,
param_group
,
param_group_list
):
assert
is_list_of
(
param_group_list
,
dict
)
param
=
set
(
param_group
[
'params'
])
param_set
=
set
()
for
group
in
param_group_list
:
param_set
.
update
(
set
(
group
[
'params'
]))
return
not
param
.
isdisjoint
(
param_set
)
def
add_params
(
self
,
params
,
module
,
prefix
=
''
,
is_dcn_module
=
None
):
"""Add all parameters of module to the params list.
The parameters of the given module will be added to the list of param
groups, with specific rules defined by paramwise_cfg.
Args:
params (list[dict]): A list of param groups, it will be modified
in place.
module (nn.Module): The module to be added.
prefix (str): The prefix of the module
is_dcn_module (int|float|None): If the current module is a
submodule of DCN, `is_dcn_module` will be passed to
control conv_offset layer's learning rate. Defaults to None.
"""
# get param-wise options
custom_keys
=
self
.
paramwise_cfg
.
get
(
'custom_keys'
,
{})
# first sort with alphabet order and then sort with reversed len of str
sorted_keys
=
sorted
(
sorted
(
custom_keys
.
keys
()),
key
=
len
,
reverse
=
True
)
bias_lr_mult
=
self
.
paramwise_cfg
.
get
(
'bias_lr_mult'
,
1.
)
bias_decay_mult
=
self
.
paramwise_cfg
.
get
(
'bias_decay_mult'
,
1.
)
norm_decay_mult
=
self
.
paramwise_cfg
.
get
(
'norm_decay_mult'
,
1.
)
dwconv_decay_mult
=
self
.
paramwise_cfg
.
get
(
'dwconv_decay_mult'
,
1.
)
bypass_duplicate
=
self
.
paramwise_cfg
.
get
(
'bypass_duplicate'
,
False
)
dcn_offset_lr_mult
=
self
.
paramwise_cfg
.
get
(
'dcn_offset_lr_mult'
,
1.
)
# special rules for norm layers and depth-wise conv layers
is_norm
=
isinstance
(
module
,
(
_BatchNorm
,
_InstanceNorm
,
GroupNorm
,
LayerNorm
))
is_dwconv
=
(
isinstance
(
module
,
torch
.
nn
.
Conv2d
)
and
module
.
in_channels
==
module
.
groups
)
for
name
,
param
in
module
.
named_parameters
(
recurse
=
False
):
param_group
=
{
'params'
:
[
param
]}
if
not
param
.
requires_grad
:
params
.
append
(
param_group
)
continue
if
bypass_duplicate
and
self
.
_is_in
(
param_group
,
params
):
warnings
.
warn
(
f
'
{
prefix
}
is duplicate. It is skipped since '
f
'bypass_duplicate=
{
bypass_duplicate
}
'
)
continue
# if the parameter match one of the custom keys, ignore other rules
is_custom
=
False
for
key
in
sorted_keys
:
if
key
in
f
'
{
prefix
}
.
{
name
}
'
:
is_custom
=
True
lr_mult
=
custom_keys
[
key
].
get
(
'lr_mult'
,
1.
)
param_group
[
'lr'
]
=
self
.
base_lr
*
lr_mult
if
self
.
base_wd
is
not
None
:
decay_mult
=
custom_keys
[
key
].
get
(
'decay_mult'
,
1.
)
param_group
[
'weight_decay'
]
=
self
.
base_wd
*
decay_mult
break
if
not
is_custom
:
# bias_lr_mult affects all bias parameters
# except for norm.bias dcn.conv_offset.bias
if
name
==
'bias'
and
not
(
is_norm
or
is_dcn_module
):
param_group
[
'lr'
]
=
self
.
base_lr
*
bias_lr_mult
if
(
prefix
.
find
(
'conv_offset'
)
!=
-
1
and
is_dcn_module
and
isinstance
(
module
,
torch
.
nn
.
Conv2d
)):
# deal with both dcn_offset's bias & weight
param_group
[
'lr'
]
=
self
.
base_lr
*
dcn_offset_lr_mult
# apply weight decay policies
if
self
.
base_wd
is
not
None
:
# norm decay
if
is_norm
:
param_group
[
'weight_decay'
]
=
self
.
base_wd
*
norm_decay_mult
# depth-wise conv
elif
is_dwconv
:
param_group
[
'weight_decay'
]
=
self
.
base_wd
*
dwconv_decay_mult
# bias lr and decay
elif
name
==
'bias'
and
not
is_dcn_module
:
# TODO: current bias_decay_mult will have affect on DCN
param_group
[
'weight_decay'
]
=
self
.
base_wd
*
bias_decay_mult
params
.
append
(
param_group
)
if
check_ops_exist
():
from
annotator.uniformer.mmcv.ops
import
DeformConv2d
,
ModulatedDeformConv2d
is_dcn_module
=
isinstance
(
module
,
(
DeformConv2d
,
ModulatedDeformConv2d
))
else
:
is_dcn_module
=
False
for
child_name
,
child_mod
in
module
.
named_children
():
child_prefix
=
f
'
{
prefix
}
.
{
child_name
}
'
if
prefix
else
child_name
self
.
add_params
(
params
,
child_mod
,
prefix
=
child_prefix
,
is_dcn_module
=
is_dcn_module
)
def
__call__
(
self
,
model
):
if
hasattr
(
model
,
'module'
):
model
=
model
.
module
optimizer_cfg
=
self
.
optimizer_cfg
.
copy
()
# if no paramwise option is specified, just use the global setting
if
not
self
.
paramwise_cfg
:
optimizer_cfg
[
'params'
]
=
model
.
parameters
()
return
build_from_cfg
(
optimizer_cfg
,
OPTIMIZERS
)
# set param-wise lr and weight decay recursively
params
=
[]
self
.
add_params
(
params
,
model
)
optimizer_cfg
[
'params'
]
=
params
return
build_from_cfg
(
optimizer_cfg
,
OPTIMIZERS
)
lavis/common/annotator/uniformer/mmcv/runner/priority.py
0 → 100644
View file @
c04f261a
# Copyright (c) OpenMMLab. All rights reserved.
from
enum
import
Enum
class
Priority
(
Enum
):
"""Hook priority levels.
+--------------+------------+
| Level | Value |
+==============+============+
| HIGHEST | 0 |
+--------------+------------+
| VERY_HIGH | 10 |
+--------------+------------+
| HIGH | 30 |
+--------------+------------+
| ABOVE_NORMAL | 40 |
+--------------+------------+
| NORMAL | 50 |
+--------------+------------+
| BELOW_NORMAL | 60 |
+--------------+------------+
| LOW | 70 |
+--------------+------------+
| VERY_LOW | 90 |
+--------------+------------+
| LOWEST | 100 |
+--------------+------------+
"""
HIGHEST
=
0
VERY_HIGH
=
10
HIGH
=
30
ABOVE_NORMAL
=
40
NORMAL
=
50
BELOW_NORMAL
=
60
LOW
=
70
VERY_LOW
=
90
LOWEST
=
100
def
get_priority
(
priority
):
"""Get priority value.
Args:
priority (int or str or :obj:`Priority`): Priority.
Returns:
int: The priority value.
"""
if
isinstance
(
priority
,
int
):
if
priority
<
0
or
priority
>
100
:
raise
ValueError
(
'priority must be between 0 and 100'
)
return
priority
elif
isinstance
(
priority
,
Priority
):
return
priority
.
value
elif
isinstance
(
priority
,
str
):
return
Priority
[
priority
.
upper
()].
value
else
:
raise
TypeError
(
'priority must be an integer or Priority enum value'
)
Prev
1
…
12
13
14
15
16
17
18
19
20
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment