Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dcnv3
Commits
41b18fd8
Commit
41b18fd8
authored
Jan 06, 2025
by
zhe chen
Browse files
Use pre-commit to reformat code
Use pre-commit to reformat code
parent
ff20ea39
Changes
390
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
256 additions
and
256 deletions
+256
-256
classification/dataset/zipreader.py
classification/dataset/zipreader.py
+4
-4
classification/ddp_hooks.py
classification/ddp_hooks.py
+1
-1
classification/ema_deepspeed.py
classification/ema_deepspeed.py
+5
-4
classification/export.py
classification/export.py
+11
-4
classification/extract_feature.py
classification/extract_feature.py
+6
-5
classification/logger.py
classification/logger.py
+3
-2
classification/lr_scheduler.py
classification/lr_scheduler.py
+2
-2
classification/main.py
classification/main.py
+61
-63
classification/main_accelerate.py
classification/main_accelerate.py
+34
-36
classification/main_deepspeed.py
classification/main_deepspeed.py
+65
-65
classification/meta_data/map22kto1k.txt
classification/meta_data/map22kto1k.txt
+1
-1
classification/models/__init__.py
classification/models/__init__.py
+1
-1
classification/models/build.py
classification/models/build.py
+1
-1
classification/models/intern_image.py
classification/models/intern_image.py
+22
-22
classification/ops_dcnv3/functions/dcnv3_func.py
classification/ops_dcnv3/functions/dcnv3_func.py
+4
-5
classification/ops_dcnv3/modules/__init__.py
classification/ops_dcnv3/modules/__init__.py
+1
-1
classification/ops_dcnv3/modules/dcnv3.py
classification/ops_dcnv3/modules/dcnv3.py
+12
-12
classification/ops_dcnv3/setup.py
classification/ops_dcnv3/setup.py
+20
-25
classification/ops_dcnv3/src/cuda/dcnv3_cuda.cu
classification/ops_dcnv3/src/cuda/dcnv3_cuda.cu
+1
-1
classification/ops_dcnv3/src/cuda/dcnv3_im2col_cuda.cuh
classification/ops_dcnv3/src/cuda/dcnv3_im2col_cuda.cuh
+1
-1
No files found.
classification/dataset/zipreader.py
View file @
41b18fd8
...
...
@@ -4,12 +4,12 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import
io
import
os
import
zipfile
import
io
import
numpy
as
np
from
PIL
import
Image
from
PIL
import
ImageFile
from
PIL
import
Image
,
ImageFile
ImageFile
.
LOAD_TRUNCATED_IMAGES
=
True
...
...
@@ -96,7 +96,7 @@ class ZipReader(object):
try
:
im
=
Image
.
open
(
io
.
BytesIO
(
data
))
except
:
print
(
"
ERROR IMG LOADED:
"
,
path_img
)
print
(
'
ERROR IMG LOADED:
'
,
path_img
)
random_img
=
np
.
random
.
rand
(
224
,
224
,
3
)
*
255
im
=
Image
.
fromarray
(
np
.
uint8
(
random_img
))
return
im
classification/ddp_hooks.py
View file @
41b18fd8
...
...
@@ -11,7 +11,7 @@ import torch.distributed as dist
def
_allreduce_fut
(
process_group
:
dist
.
ProcessGroup
,
tensor
:
torch
.
Tensor
)
->
torch
.
futures
.
Future
[
torch
.
Tensor
]:
"
Averages the input gradient tensor by allreduce and returns a future.
"
'
Averages the input gradient tensor by allreduce and returns a future.
'
group_to_use
=
process_group
if
process_group
is
not
None
else
dist
.
group
.
WORLD
# Apply the division first to avoid overflow, especially for FP16.
...
...
classification/ema_deepspeed.py
View file @
41b18fd8
from
contextlib
import
contextmanager
import
deepspeed
import
torch
import
torch.nn
as
nn
import
deepspeed
from
deepspeed.runtime.zero
import
GatheredParameters
from
contextlib
import
contextmanager
class
EMADeepspeed
(
nn
.
Module
):
...
...
@@ -49,7 +50,7 @@ class EMADeepspeed(nn.Module):
shadow_params
[
sname
]
=
shadow_params
[
sname
].
type_as
(
m_param
[
key
])
shadow_params
[
sname
].
sub_
(
one_minus_decay
*
(
shadow_params
[
sname
]
-
m_param
[
key
]))
else
:
assert
not
key
in
self
.
m_name2s_name
assert
key
not
in
self
.
m_name2s_name
def
copy_to
(
self
,
model
):
shadow_params
=
dict
(
self
.
named_buffers
())
...
...
@@ -60,7 +61,7 @@ class EMADeepspeed(nn.Module):
if
m_param
[
key
].
requires_grad
:
m_param
[
key
].
data
.
copy_
(
shadow_params
[
self
.
m_name2s_name
[
key
]].
data
)
else
:
assert
not
key
in
self
.
m_name2s_name
assert
key
not
in
self
.
m_name2s_name
def
store
(
self
,
model
):
"""
...
...
classification/export.py
View file @
41b18fd8
...
...
@@ -4,15 +4,15 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import
argparse
import
os
import
time
import
argparse
import
torch
from
tqdm
import
tqdm
from
config
import
get_config
from
models
import
build_model
from
tqdm
import
tqdm
def
get_args
():
parser
=
argparse
.
ArgumentParser
()
...
...
@@ -31,6 +31,7 @@ def get_args():
cfg
=
get_config
(
args
)
return
args
,
cfg
def
get_model
(
args
,
cfg
):
model
=
build_model
(
cfg
)
ckpt
=
torch
.
load
(
args
.
ckpt
,
map_location
=
'cpu'
)[
'model'
]
...
...
@@ -38,6 +39,7 @@ def get_model(args, cfg):
model
.
load_state_dict
(
ckpt
)
return
model
def
speed_test
(
model
,
input
):
# warmup
for
_
in
tqdm
(
range
(
100
)):
...
...
@@ -50,7 +52,8 @@ def speed_test(model, input):
_
=
model
(
input
)
end
=
time
.
time
()
th
=
100
/
(
end
-
start
)
print
(
f
"using time:
{
end
-
start
}
, throughput
{
th
}
"
)
print
(
f
'using time:
{
end
-
start
}
, throughput
{
th
}
'
)
def
torch2onnx
(
args
,
cfg
):
model
=
get_model
(
args
,
cfg
).
cuda
()
...
...
@@ -66,6 +69,7 @@ def torch2onnx(args, cfg):
return
model
def
onnx2trt
(
args
):
from
mmdeploy.backend.tensorrt
import
from_onnx
...
...
@@ -83,6 +87,7 @@ def onnx2trt(args):
max_workspace_size
=
2
**
30
,
)
def
check
(
args
,
cfg
):
from
mmdeploy.backend.tensorrt.wrapper
import
TRTWrapper
...
...
@@ -105,6 +110,7 @@ def check(args, cfg):
speed_test
(
model
,
x
)
speed_test
(
trt_model
,
dict
(
input
=
x
))
def
main
():
args
,
cfg
=
get_args
()
...
...
@@ -117,5 +123,6 @@ def main():
print
(
'onnx -> trt: success'
)
check
(
args
,
cfg
)
if
__name__
==
'__main__'
:
main
()
classification/extract_feature.py
View file @
41b18fd8
...
...
@@ -71,8 +71,8 @@ class IntermediateLayerGetter:
def
main
(
args
,
config
):
from
models
import
build_model
import
torchvision.transforms
as
T
from
models
import
build_model
from
PIL
import
Image
model
=
build_model
(
config
)
...
...
@@ -110,13 +110,14 @@ def main(args, config):
if
__name__
==
'__main__'
:
import
argparse
import
torch
from
config
import
get_config
parser
=
argparse
.
ArgumentParser
(
'Get Intermediate Layer Output'
)
parser
.
add_argument
(
'--cfg'
,
type
=
str
,
required
=
True
,
metavar
=
"
FILE
"
,
help
=
'Path to config file'
)
parser
.
add_argument
(
'--img'
,
type
=
str
,
required
=
True
,
metavar
=
"
FILE
"
,
help
=
'Path to img file'
)
parser
.
add_argument
(
"
--keys
"
,
default
=
None
,
nargs
=
'+'
,
help
=
"The intermediate layer's keys you want to save."
)
parser
.
add_argument
(
'--cfg'
,
type
=
str
,
required
=
True
,
metavar
=
'
FILE
'
,
help
=
'Path to config file'
)
parser
.
add_argument
(
'--img'
,
type
=
str
,
required
=
True
,
metavar
=
'
FILE
'
,
help
=
'Path to img file'
)
parser
.
add_argument
(
'
--keys
'
,
default
=
None
,
nargs
=
'+'
,
help
=
"The intermediate layer's keys you want to save."
)
parser
.
add_argument
(
'--resume'
,
help
=
'resume from checkpoint'
)
parser
.
add_argument
(
'--save'
,
action
=
'store_true'
,
help
=
'Save the results.'
)
args
=
parser
.
parse_args
()
...
...
@@ -125,4 +126,4 @@ if __name__ == '__main__':
mid_outputs
,
model_output
=
main
(
args
,
config
)
if
args
.
save
:
torch
.
save
(
mid_outputs
,
args
.
img
[:
-
3
]
+
'.pth'
)
\ No newline at end of file
torch
.
save
(
mid_outputs
,
args
.
img
[:
-
3
]
+
'.pth'
)
classification/logger.py
View file @
41b18fd8
...
...
@@ -4,10 +4,11 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import
functools
import
logging
import
os
import
sys
import
logging
import
functools
from
termcolor
import
colored
...
...
classification/lr_scheduler.py
View file @
41b18fd8
...
...
@@ -6,8 +6,8 @@
import
torch
from
timm.scheduler.cosine_lr
import
CosineLRScheduler
from
timm.scheduler.step_lr
import
StepLRScheduler
from
timm.scheduler.scheduler
import
Scheduler
from
timm.scheduler.step_lr
import
StepLRScheduler
def
build_scheduler
(
config
,
optimizer
,
n_iter_per_epoch
):
...
...
@@ -67,7 +67,7 @@ class LinearLRScheduler(Scheduler):
initialize
=
True
,
)
->
None
:
super
().
__init__
(
optimizer
,
param_group_field
=
"
lr
"
,
param_group_field
=
'
lr
'
,
noise_range_t
=
noise_range_t
,
noise_pct
=
noise_pct
,
noise_std
=
noise_std
,
...
...
classification/main.py
View file @
41b18fd8
...
...
@@ -4,34 +4,32 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import
os
import
time
import
random
import
argparse
import
datetime
import
numpy
as
np
import
os
import
random
import
subprocess
import
time
from
contextlib
import
suppress
import
numpy
as
np
import
torch
import
torch.backends.cudnn
as
cudnn
import
torch.distributed
as
dist
from
timm.utils
import
ModelEma
,
ApexScaler
from
timm.loss
import
LabelSmoothingCrossEntropy
,
SoftTargetCrossEntropy
from
timm.utils
import
accuracy
,
AverageMeter
from
config
import
get_config
from
models
import
build_model
from
dataset
import
build_loader
from
ddp_hooks
import
fp16_compress_hook
from
logger
import
create_logger
from
lr_scheduler
import
build_scheduler
from
models
import
build_model
from
optimizer
import
build_optimizer
from
logger
import
create_logger
from
timm.loss
import
LabelSmoothingCrossEntropy
,
SoftTargetCrossEntropy
from
timm.utils
import
ApexScaler
,
AverageMeter
,
ModelEma
,
accuracy
from
utils
import
MyAverageMeter
from
utils
import
NativeScalerWithGradNormCount
as
NativeScaler
from
utils
import
(
load_checkpoint
,
load_pretrained
,
save_checkpoint
,
get_grad_norm
,
auto_resume_helper
,
reduce_tensor
,
load_ema_checkpoint
,
MyAverageMeter
)
from
contextlib
import
suppress
from
ddp_hooks
import
fp16_compress_hook
from
utils
import
(
auto_resume_helper
,
get_grad_norm
,
load_checkpoint
,
load_ema_checkpoint
,
load_pretrained
,
reduce_tensor
,
save_checkpoint
)
try
:
from
apex
import
amp
...
...
@@ -60,10 +58,10 @@ def parse_option():
parser
.
add_argument
(
'--cfg'
,
type
=
str
,
required
=
True
,
metavar
=
"
FILE
"
,
metavar
=
'
FILE
'
,
help
=
'path to config file'
)
parser
.
add_argument
(
"
--opts
"
,
'
--opts
'
,
help
=
"Modify config options by adding 'KEY VALUE' pairs. "
,
default
=
None
,
nargs
=
'+'
)
...
...
@@ -71,7 +69,7 @@ def parse_option():
# easy config modification
parser
.
add_argument
(
'--batch-size'
,
type
=
int
,
help
=
"
batch size for single GPU
"
)
help
=
'
batch size for single GPU
'
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
help
=
'dataset name'
,
...
...
@@ -98,11 +96,11 @@ def parse_option():
parser
.
add_argument
(
'--accumulation-steps'
,
type
=
int
,
default
=
1
,
help
=
"
gradient accumulation steps
"
)
help
=
'
gradient accumulation steps
'
)
parser
.
add_argument
(
'--use-checkpoint'
,
action
=
'store_true'
,
help
=
"
whether to use gradient checkpointing to save memory
"
)
help
=
'
whether to use gradient checkpointing to save memory
'
)
parser
.
add_argument
(
'--amp-opt-level'
,
type
=
str
,
...
...
@@ -128,10 +126,10 @@ def parse_option():
parser
.
add_argument
(
'--use-zero'
,
action
=
'store_true'
,
help
=
"
whether to use ZeroRedundancyOptimizer (ZeRO) to save memory
"
)
help
=
'
whether to use ZeroRedundancyOptimizer (ZeRO) to save memory
'
)
# distributed training
parser
.
add_argument
(
"
--local-rank
"
,
parser
.
add_argument
(
'
--local-rank
'
,
type
=
int
,
required
=
True
,
help
=
'local rank for DistributedDataParallel'
)
...
...
@@ -152,14 +150,14 @@ def throughput(data_loader, model, logger):
for
i
in
range
(
50
):
model
(
images
)
torch
.
cuda
.
synchronize
()
logger
.
info
(
f
"
throughput averaged with 30 times
"
)
logger
.
info
(
f
'
throughput averaged with 30 times
'
)
tic1
=
time
.
time
()
for
i
in
range
(
30
):
model
(
images
)
torch
.
cuda
.
synchronize
()
tic2
=
time
.
time
()
logger
.
info
(
f
"
batch_size
{
batch_size
}
throughput
{
30
*
batch_size
/
(
tic2
-
tic1
)
}
"
f
'
batch_size
{
batch_size
}
throughput
{
30
*
batch_size
/
(
tic2
-
tic1
)
}
'
)
return
...
...
@@ -170,7 +168,7 @@ def main(config):
data_loader_val
,
data_loader_test
,
mixup_fn
=
build_loader
(
config
)
# build runner
logger
.
info
(
f
"
Creating model:
{
config
.
MODEL
.
TYPE
}
/
{
config
.
MODEL
.
NAME
}
"
)
logger
.
info
(
f
'
Creating model:
{
config
.
MODEL
.
TYPE
}
/
{
config
.
MODEL
.
NAME
}
'
)
model
=
build_model
(
config
)
model
.
cuda
()
logger
.
info
(
str
(
model
))
...
...
@@ -178,7 +176,7 @@ def main(config):
# build optimizer
optimizer
=
build_optimizer
(
config
,
model
)
if
config
.
AMP_OPT_LEVEL
!=
"
O0
"
:
if
config
.
AMP_OPT_LEVEL
!=
'
O0
'
:
config
.
defrost
()
if
has_native_amp
:
config
.
native_amp
=
True
...
...
@@ -189,14 +187,14 @@ def main(config):
else
:
use_amp
=
None
logger
.
warning
(
"
Neither APEX or native Torch AMP is available, using float32.
"
"
Install NVIDA apex or upgrade to PyTorch 1.6
"
)
'
Neither APEX or native Torch AMP is available, using float32.
'
'
Install NVIDA apex or upgrade to PyTorch 1.6
'
)
config
.
freeze
()
# setup automatic mixed-precision (AMP) loss scaling and op casting
amp_autocast
=
suppress
# do nothing
loss_scaler
=
None
if
config
.
AMP_OPT_LEVEL
!=
"
O0
"
:
if
config
.
AMP_OPT_LEVEL
!=
'
O0
'
:
if
use_amp
==
'apex'
:
model
,
optimizer
=
amp
.
initialize
(
model
,
optimizer
,
...
...
@@ -223,16 +221,16 @@ def main(config):
model
.
register_comm_hook
(
state
=
None
,
hook
=
fp16_compress_hook
)
logger
.
info
(
'using fp16_compress_hook!'
)
except
:
logger
.
info
(
"
cannot register fp16_compress_hook!
"
)
logger
.
info
(
'
cannot register fp16_compress_hook!
'
)
model_without_ddp
=
model
.
module
n_parameters
=
sum
(
p
.
numel
()
for
p
in
model
.
parameters
()
if
p
.
requires_grad
)
logger
.
info
(
f
"
number of params:
{
n_parameters
}
"
)
logger
.
info
(
f
'
number of params:
{
n_parameters
}
'
)
if
hasattr
(
model_without_ddp
,
'flops'
):
flops
=
model_without_ddp
.
flops
()
logger
.
info
(
f
"
number of GFLOPs:
{
flops
/
1e9
}
"
)
logger
.
info
(
f
'
number of GFLOPs:
{
flops
/
1e9
}
'
)
# build learning rate scheduler
lr_scheduler
=
build_scheduler
(
config
,
optimizer
,
len
(
data_loader_train
))
\
...
...
@@ -256,7 +254,7 @@ def main(config):
if
resume_file
:
if
config
.
MODEL
.
RESUME
:
logger
.
warning
(
f
"
auto-resume changing resume file from
{
config
.
MODEL
.
RESUME
}
to
{
resume_file
}
"
f
'
auto-resume changing resume file from
{
config
.
MODEL
.
RESUME
}
to
{
resume_file
}
'
)
config
.
defrost
()
config
.
MODEL
.
RESUME
=
resume_file
...
...
@@ -274,14 +272,14 @@ def main(config):
if
data_loader_val
is
not
None
:
acc1
,
acc5
,
loss
=
validate
(
config
,
data_loader_val
,
model
)
logger
.
info
(
f
"
Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
"
f
'
Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
'
)
elif
config
.
MODEL
.
PRETRAINED
:
load_pretrained
(
config
,
model_without_ddp
,
logger
)
if
data_loader_val
is
not
None
:
acc1
,
acc5
,
loss
=
validate
(
config
,
data_loader_val
,
model
)
logger
.
info
(
f
"
Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
"
f
'
Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
'
)
# evaluate EMA
...
...
@@ -289,12 +287,12 @@ def main(config):
if
config
.
TRAIN
.
EMA
.
ENABLE
:
# Important to create EMA model after cuda(), DP wrapper, and AMP but before SyncBN and DDP wrapper
model_ema
=
ModelEma
(
model
,
decay
=
config
.
TRAIN
.
EMA
.
DECAY
)
print
(
"
Using EMA with decay = %.8f
"
%
config
.
TRAIN
.
EMA
.
DECAY
)
print
(
'
Using EMA with decay = %.8f
'
%
config
.
TRAIN
.
EMA
.
DECAY
)
if
config
.
MODEL
.
RESUME
:
load_ema_checkpoint
(
config
,
model_ema
,
logger
)
acc1
,
acc5
,
loss
=
validate
(
config
,
data_loader_val
,
model_ema
.
ema
)
logger
.
info
(
f
"
Accuracy of the ema network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
"
f
'
Accuracy of the ema network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
'
)
if
config
.
THROUGHPUT_MODE
:
...
...
@@ -304,7 +302,7 @@ def main(config):
return
# train
logger
.
info
(
"
Start training
"
)
logger
.
info
(
'
Start training
'
)
start_time
=
time
.
time
()
for
epoch
in
range
(
config
.
TRAIN
.
START_EPOCH
,
config
.
TRAIN
.
EPOCHS
):
data_loader_train
.
sampler
.
set_epoch
(
epoch
)
...
...
@@ -337,7 +335,7 @@ def main(config):
if
data_loader_val
is
not
None
and
epoch
%
config
.
EVAL_FREQ
==
0
:
acc1
,
acc5
,
loss
=
validate
(
config
,
data_loader_val
,
model
,
epoch
)
logger
.
info
(
f
"
Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
"
f
'
Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
'
)
if
dist
.
get_rank
()
==
0
and
acc1
>
max_accuracy
:
save_checkpoint
(
config
,
...
...
@@ -357,7 +355,7 @@ def main(config):
acc1
,
acc5
,
loss
=
validate
(
config
,
data_loader_val
,
model_ema
.
ema
,
epoch
)
logger
.
info
(
f
"
Accuracy of the ema network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
"
f
'
Accuracy of the ema network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
'
)
if
dist
.
get_rank
()
==
0
and
acc1
>
max_ema_accuracy
:
save_checkpoint
(
config
,
...
...
@@ -411,7 +409,7 @@ def train_one_epoch(config,
samples
,
targets
=
mixup_fn
(
samples
,
targets
)
if
not
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
))
and
config
.
AMP_OPT_LEVEL
!=
"
O0
"
:
(
1
,
9
))
and
config
.
AMP_OPT_LEVEL
!=
'
O0
'
:
with
amp_autocast
(
dtype
=
amp_type
):
outputs
=
model
(
samples
)
else
:
...
...
@@ -420,7 +418,7 @@ def train_one_epoch(config,
if
config
.
TRAIN
.
ACCUMULATION_STEPS
>
1
:
if
not
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
))
and
config
.
AMP_OPT_LEVEL
!=
"
O0
"
:
TORCH_VERSION
,
(
1
,
9
))
and
config
.
AMP_OPT_LEVEL
!=
'
O0
'
:
with
amp_autocast
(
dtype
=
amp_type
):
loss
=
criterion
(
outputs
,
targets
)
loss
=
loss
/
config
.
TRAIN
.
ACCUMULATION_STEPS
...
...
@@ -428,7 +426,7 @@ def train_one_epoch(config,
with
amp_autocast
():
loss
=
criterion
(
outputs
,
targets
)
loss
=
loss
/
config
.
TRAIN
.
ACCUMULATION_STEPS
if
config
.
AMP_OPT_LEVEL
!=
"
O0
"
:
if
config
.
AMP_OPT_LEVEL
!=
'
O0
'
:
is_second_order
=
hasattr
(
optimizer
,
'is_second_order'
)
and
\
optimizer
.
is_second_order
grad_norm
=
loss_scaler
(
loss
,
...
...
@@ -458,14 +456,14 @@ def train_one_epoch(config,
lr_scheduler
.
step_update
(
epoch
*
num_steps
+
idx
)
else
:
if
not
obsolete_torch_version
(
TORCH_VERSION
,
(
1
,
9
))
and
config
.
AMP_OPT_LEVEL
!=
"
O0
"
:
TORCH_VERSION
,
(
1
,
9
))
and
config
.
AMP_OPT_LEVEL
!=
'
O0
'
:
with
amp_autocast
(
dtype
=
amp_type
):
loss
=
criterion
(
outputs
,
targets
)
else
:
with
amp_autocast
():
loss
=
criterion
(
outputs
,
targets
)
optimizer
.
zero_grad
()
if
config
.
AMP_OPT_LEVEL
!=
"
O0
"
:
if
config
.
AMP_OPT_LEVEL
!=
'
O0
'
:
is_second_order
=
hasattr
(
optimizer
,
'is_second_order'
)
and
\
optimizer
.
is_second_order
grad_norm
=
loss_scaler
(
loss
,
...
...
@@ -513,7 +511,7 @@ def train_one_epoch(config,
f
'mem
{
memory_used
:.
0
f
}
MB'
)
epoch_time
=
time
.
time
()
-
start
logger
.
info
(
f
"
EPOCH
{
epoch
}
training takes
{
datetime
.
timedelta
(
seconds
=
int
(
epoch_time
))
}
"
f
'
EPOCH
{
epoch
}
training takes
{
datetime
.
timedelta
(
seconds
=
int
(
epoch_time
))
}
'
)
...
...
@@ -578,35 +576,35 @@ def validate(config, data_loader, model, epoch=None):
if
__name__
==
'__main__'
:
_
,
config
=
parse_option
()
if
config
.
AMP_OPT_LEVEL
!=
"
O0
"
:
assert
has_native_amp
,
"
Please update pytorch(1.6+) to support amp!
"
if
config
.
AMP_OPT_LEVEL
!=
'
O0
'
:
assert
has_native_amp
,
'
Please update pytorch(1.6+) to support amp!
'
# init distributed env
if
'SLURM_PROCID'
in
os
.
environ
and
int
(
os
.
environ
[
'SLURM_TASKS_PER_NODE'
])
!=
1
:
print
(
"
\n
Dist init: SLURM
"
)
print
(
'
\n
Dist init: SLURM
'
)
rank
=
int
(
os
.
environ
[
'SLURM_PROCID'
])
gpu
=
rank
%
torch
.
cuda
.
device_count
()
config
.
defrost
()
config
.
LOCAL_RANK
=
gpu
config
.
freeze
()
world_size
=
int
(
os
.
environ
[
"
SLURM_NTASKS
"
])
if
"
MASTER_PORT
"
not
in
os
.
environ
:
os
.
environ
[
"
MASTER_PORT
"
]
=
"
29501
"
node_list
=
os
.
environ
[
"
SLURM_NODELIST
"
]
world_size
=
int
(
os
.
environ
[
'
SLURM_NTASKS
'
])
if
'
MASTER_PORT
'
not
in
os
.
environ
:
os
.
environ
[
'
MASTER_PORT
'
]
=
'
29501
'
node_list
=
os
.
environ
[
'
SLURM_NODELIST
'
]
addr
=
subprocess
.
getoutput
(
f
"
scontrol show hostname
{
node_list
}
| head -n1
"
)
if
"
MASTER_ADDR
"
not
in
os
.
environ
:
os
.
environ
[
"
MASTER_ADDR
"
]
=
addr
f
'
scontrol show hostname
{
node_list
}
| head -n1
'
)
if
'
MASTER_ADDR
'
not
in
os
.
environ
:
os
.
environ
[
'
MASTER_ADDR
'
]
=
addr
os
.
environ
[
'RANK'
]
=
str
(
rank
)
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
gpu
)
os
.
environ
[
'LOCAL_SIZE'
]
=
str
(
torch
.
cuda
.
device_count
())
os
.
environ
[
'WORLD_SIZE'
]
=
str
(
world_size
)
if
'RANK'
in
os
.
environ
and
'WORLD_SIZE'
in
os
.
environ
:
rank
=
int
(
os
.
environ
[
"
RANK
"
])
rank
=
int
(
os
.
environ
[
'
RANK
'
])
world_size
=
int
(
os
.
environ
[
'WORLD_SIZE'
])
print
(
f
"
RANK and WORLD_SIZE in environ:
{
rank
}
/
{
world_size
}
"
)
print
(
f
'
RANK and WORLD_SIZE in environ:
{
rank
}
/
{
world_size
}
'
)
else
:
rank
=
-
1
world_size
=
-
1
...
...
@@ -647,13 +645,13 @@ if __name__ == '__main__':
os
.
makedirs
(
config
.
OUTPUT
,
exist_ok
=
True
)
logger
=
create_logger
(
output_dir
=
config
.
OUTPUT
,
dist_rank
=
dist
.
get_rank
(),
name
=
f
"
{
config
.
MODEL
.
NAME
}
"
)
name
=
f
'
{
config
.
MODEL
.
NAME
}
'
)
if
dist
.
get_rank
()
==
0
:
path
=
os
.
path
.
join
(
config
.
OUTPUT
,
"
config.json
"
)
with
open
(
path
,
"w"
)
as
f
:
path
=
os
.
path
.
join
(
config
.
OUTPUT
,
'
config.json
'
)
with
open
(
path
,
'w'
)
as
f
:
f
.
write
(
config
.
dump
())
logger
.
info
(
f
"
Full config saved to
{
path
}
"
)
logger
.
info
(
f
'
Full config saved to
{
path
}
'
)
# print config
logger
.
info
(
config
.
dump
())
...
...
classification/main_accelerate.py
View file @
41b18fd8
...
...
@@ -4,31 +4,29 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import
datetime
import
argparse
import
os
import
time
import
datetime
import
logging
import
os
import
random
import
time
import
warnings
import
numpy
as
np
import
torch
import
torch.backends.cudnn
as
cudnn
import
numpy
as
np
from
accelerate
import
Accelerator
from
accelerate
import
GradScalerKwargs
from
accelerate
import
Accelerator
,
GradScalerKwargs
from
accelerate.logging
import
get_logger
from
timm.loss
import
LabelSmoothingCrossEntropy
,
SoftTargetCrossEntropy
from
timm.utils
import
AverageMeter
,
accuracy
,
ModelEma
from
tqdm
import
tqdm
import
warnings
from
config
import
get_config
from
models
import
build_model
from
dataset
import
build_loader2
from
ddp_hooks
import
fp16_compress_hook
from
lr_scheduler
import
build_scheduler
from
models
import
build_model
from
optimizer
import
build_optimizer
from
utils
import
load_pretrained
,
load_ema_checkpoint
from
ddp_hooks
import
fp16_compress_hook
from
timm.loss
import
LabelSmoothingCrossEntropy
,
SoftTargetCrossEntropy
from
timm.utils
import
AverageMeter
,
ModelEma
,
accuracy
from
tqdm
import
tqdm
from
utils
import
load_ema_checkpoint
,
load_pretrained
logger
=
get_logger
(
__name__
)
warnings
.
filterwarnings
(
'ignore'
)
...
...
@@ -37,11 +35,11 @@ warnings.filterwarnings('ignore')
def
parse_option
():
parser
=
argparse
.
ArgumentParser
(
'InternImage training and evaluation script'
,
add_help
=
False
)
parser
.
add_argument
(
'--cfg'
,
type
=
str
,
required
=
True
,
metavar
=
"
FILE
"
,
help
=
'path to config file'
)
parser
.
add_argument
(
"
--opts
"
,
help
=
"Modify config options by adding 'KEY VALUE' pairs. "
,
default
=
None
,
nargs
=
'+'
)
parser
.
add_argument
(
'--cfg'
,
type
=
str
,
required
=
True
,
metavar
=
'
FILE
'
,
help
=
'path to config file'
)
parser
.
add_argument
(
'
--opts
'
,
help
=
"Modify config options by adding 'KEY VALUE' pairs. "
,
default
=
None
,
nargs
=
'+'
)
# easy config modification
parser
.
add_argument
(
'--batch-size'
,
type
=
int
,
help
=
"
batch size for single GPU
"
)
parser
.
add_argument
(
'--batch-size'
,
type
=
int
,
help
=
'
batch size for single GPU
'
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
help
=
'dataset name'
,
default
=
None
)
parser
.
add_argument
(
'--data-path'
,
type
=
str
,
help
=
'path to dataset'
)
parser
.
add_argument
(
'--zip'
,
action
=
'store_true'
,
help
=
'use zipped dataset instead of folder dataset'
)
...
...
@@ -58,16 +56,16 @@ def parse_option():
parser
.
add_argument
(
'--eval'
,
action
=
'store_true'
,
help
=
'Perform evaluation only'
)
parser
.
add_argument
(
'--throughput'
,
action
=
'store_true'
,
help
=
'Test throughput only'
)
parser
.
add_argument
(
'--save-ckpt-num'
,
default
=
1
,
type
=
int
)
parser
.
add_argument
(
'--accumulation-steps'
,
type
=
int
,
default
=
1
,
help
=
"
gradient accumulation steps
"
)
parser
.
add_argument
(
'--accumulation-steps'
,
type
=
int
,
default
=
1
,
help
=
'
gradient accumulation steps
'
)
parser
.
add_argument
(
'--disable-grad-scalar'
,
action
=
'store_true'
,
help
=
'disable Grad Scalar'
)
parser
.
add_argument
(
"
--logger
"
,
'
--logger
'
,
type
=
str
,
default
=
"
tensorboard
"
,
choices
=
[
"
tensorboard
"
,
"
wandb
"
],
default
=
'
tensorboard
'
,
choices
=
[
'
tensorboard
'
,
'
wandb
'
],
help
=
(
"
Whether to use [tensorboard](https://www.tensorflow.org/tensorboard) or [wandb](https://www.wandb.ai)
"
"
for experiment tracking and logging of model metrics and model checkpoints
"
'
Whether to use [tensorboard](https://www.tensorflow.org/tensorboard) or [wandb](https://www.wandb.ai)
'
'
for experiment tracking and logging of model metrics and model checkpoints
'
),
)
...
...
@@ -91,10 +89,10 @@ def seed_everything(seed, rank):
def
save_config
(
config
):
path
=
os
.
path
.
join
(
config
.
OUTPUT
,
"
config.json
"
)
with
open
(
path
,
"w"
)
as
f
:
path
=
os
.
path
.
join
(
config
.
OUTPUT
,
'
config.json
'
)
with
open
(
path
,
'w'
)
as
f
:
f
.
write
(
config
.
dump
())
logger
.
info
(
f
"
Full config saved to
{
path
}
"
)
logger
.
info
(
f
'
Full config saved to
{
path
}
'
)
def
build_criterion
(
config
):
...
...
@@ -140,7 +138,7 @@ def setup_autoresume(config):
if
resume_file
:
if
config
.
MODEL
.
RESUME
:
logger
.
warning
(
f
"
auto-resume changing resume file from
{
config
.
MODEL
.
RESUME
}
to
{
resume_file
}
"
)
logger
.
warning
(
f
'
auto-resume changing resume file from
{
config
.
MODEL
.
RESUME
}
to
{
resume_file
}
'
)
config
.
defrost
()
config
.
MODEL
.
RESUME
=
resume_file
config
.
freeze
()
...
...
@@ -200,10 +198,10 @@ def load_checkpoint_if_needed(accelerator, config, lr_scheduler=None):
def
log_model_statistic
(
model_wo_ddp
):
n_parameters
=
sum
(
p
.
numel
()
for
p
in
model_wo_ddp
.
parameters
()
if
p
.
requires_grad
)
logger
.
info
(
f
"
number of params:
{
n_parameters
}
"
)
logger
.
info
(
f
'
number of params:
{
n_parameters
}
'
)
if
hasattr
(
model_wo_ddp
,
'flops'
):
flops
=
model_wo_ddp
.
flops
()
logger
.
info
(
f
"
number of GFLOPs:
{
flops
/
1e9
}
"
)
logger
.
info
(
f
'
number of GFLOPs:
{
flops
/
1e9
}
'
)
def
train_epoch
(
*
,
model
,
optimizer
,
data_loader
,
scheduler
,
criterion
,
mixup_fn
,
...
...
@@ -316,15 +314,15 @@ def train(config, accelerator: Accelerator):
model
.
register_comm_hook
(
state
=
None
,
hook
=
fp16_compress_hook
)
logger
.
info
(
'using fp16_compress_hook!'
)
except
:
logger
.
info
(
"
cannot register fp16_compress_hook!
"
)
logger
.
info
(
'
cannot register fp16_compress_hook!
'
)
max_acc
=
load_checkpoint_if_needed
(
accelerator
,
config
,
lr_scheduler
)
logger
.
info
(
f
"
Created model:
{
config
.
MODEL
.
TYPE
}
/
{
config
.
MODEL
.
NAME
}
"
)
logger
.
info
(
f
'
Created model:
{
config
.
MODEL
.
TYPE
}
/
{
config
.
MODEL
.
NAME
}
'
)
logger
.
info
(
str
(
model
))
logger
.
info
(
"
Effective Optimizer Steps: {}
"
.
format
(
effective_update_steps_per_epoch
))
logger
.
info
(
"
Start training
"
)
logger
.
info
(
"
Max accuracy: {}
"
.
format
(
max_acc
))
logger
.
info
(
'
Effective Optimizer Steps: {}
'
.
format
(
effective_update_steps_per_epoch
))
logger
.
info
(
'
Start training
'
)
logger
.
info
(
'
Max accuracy: {}
'
.
format
(
max_acc
))
log_model_statistic
(
accelerator
.
unwrap_model
(
model
))
for
epoch
in
range
(
config
.
TRAIN
.
START_EPOCH
,
config
.
TRAIN
.
EPOCHS
):
...
...
@@ -346,8 +344,8 @@ def main():
args
,
config
=
parse_option
()
os
.
makedirs
(
config
.
OUTPUT
,
exist_ok
=
True
)
logging
.
basicConfig
(
format
=
"
%(asctime)s - %(levelname)s - %(name)s - %(message)s
"
,
datefmt
=
"
%m/%d/%Y %H:%M:%S
"
,
format
=
'
%(asctime)s - %(levelname)s - %(name)s - %(message)s
'
,
datefmt
=
'
%m/%d/%Y %H:%M:%S
'
,
filename
=
os
.
path
.
join
(
config
.
OUTPUT
,
'run.log'
),
level
=
logging
.
INFO
,
)
...
...
classification/main_deepspeed.py
View file @
41b18fd8
...
...
@@ -4,40 +4,39 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import
os
import
time
import
random
import
argparse
import
datetime
import
numpy
as
np
import
os
import
random
import
subprocess
import
time
import
deepspeed
import
numpy
as
np
import
torch
import
torch.backends.cudnn
as
cudnn
import
torch.distributed
as
dist
import
deepspeed
from
timm.loss
import
LabelSmoothingCrossEntropy
,
SoftTargetCrossEntropy
from
timm.utils
import
accuracy
,
AverageMeter
from
config
import
get_config
from
models
import
build_model
from
dataset
import
build_loader
from
lr_scheduler
import
build_scheduler
from
optimizer
import
set_weight_decay_and_lr
from
logger
import
create_logger
from
utils
import
load_pretrained
,
reduce_tensor
,
MyAverageMeter
from
ddp_hooks
import
fp16_compress_hook
from
ema_deepspeed
import
EMADeepspeed
from
logger
import
create_logger
from
lr_scheduler
import
build_scheduler
from
models
import
build_model
from
optimizer
import
set_weight_decay_and_lr
from
timm.loss
import
LabelSmoothingCrossEntropy
,
SoftTargetCrossEntropy
from
timm.utils
import
AverageMeter
,
accuracy
from
utils
import
MyAverageMeter
,
load_pretrained
,
reduce_tensor
def
parse_option
():
parser
=
argparse
.
ArgumentParser
(
'InternImage training and evaluation script'
,
add_help
=
False
)
parser
.
add_argument
(
'--cfg'
,
type
=
str
,
required
=
True
,
metavar
=
"
FILE
"
,
help
=
'path to config file'
)
parser
.
add_argument
(
"
--opts
"
,
help
=
"Modify config options by adding 'KEY VALUE' pairs. "
,
default
=
None
,
nargs
=
'+'
)
parser
.
add_argument
(
'--cfg'
,
type
=
str
,
required
=
True
,
metavar
=
'
FILE
'
,
help
=
'path to config file'
)
parser
.
add_argument
(
'
--opts
'
,
help
=
"Modify config options by adding 'KEY VALUE' pairs. "
,
default
=
None
,
nargs
=
'+'
)
# easy config modification
parser
.
add_argument
(
'--batch-size'
,
type
=
int
,
help
=
"
batch size for single GPU
"
)
parser
.
add_argument
(
'--batch-size'
,
type
=
int
,
help
=
'
batch size for single GPU
'
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
help
=
'dataset name'
,
default
=
None
)
parser
.
add_argument
(
'--data-path'
,
type
=
str
,
help
=
'path to dataset'
)
parser
.
add_argument
(
'--zip'
,
action
=
'store_true'
,
help
=
'use zipped dataset instead of folder dataset'
)
...
...
@@ -56,10 +55,10 @@ def parse_option():
parser
.
add_argument
(
'--eval'
,
action
=
'store_true'
,
help
=
'Perform evaluation only'
)
parser
.
add_argument
(
'--throughput'
,
action
=
'store_true'
,
help
=
'Test throughput only'
)
parser
.
add_argument
(
'--save-ckpt-num'
,
default
=
1
,
type
=
int
)
parser
.
add_argument
(
'--accumulation-steps'
,
type
=
int
,
default
=
1
,
help
=
"
gradient accumulation steps
"
)
parser
.
add_argument
(
'--accumulation-steps'
,
type
=
int
,
default
=
1
,
help
=
'
gradient accumulation steps
'
)
# distributed training
parser
.
add_argument
(
"
--local-rank
"
,
type
=
int
,
required
=
True
,
help
=
'local rank for DistributedDataParallel'
)
parser
.
add_argument
(
'
--local-rank
'
,
type
=
int
,
required
=
True
,
help
=
'local rank for DistributedDataParallel'
)
# deepspeed config
parser
.
add_argument
(
'--disable-grad-scalar'
,
action
=
'store_true'
,
help
=
'disable Grad Scalar'
)
...
...
@@ -69,7 +68,7 @@ def parse_option():
help
=
'enable model offloading'
)
# To use Zero3, Please use main_accelerate.py instead.
# For this script, we are facing a similar issue as https://github.com/microsoft/DeepSpeed/issues/3068
parser
.
add_argument
(
"
--zero-stage
"
,
type
=
int
,
default
=
1
,
choices
=
[
1
,
2
],
help
=
'deep speed zero stage'
)
parser
.
add_argument
(
'
--zero-stage
'
,
type
=
int
,
default
=
1
,
choices
=
[
1
,
2
],
help
=
'deep speed zero stage'
)
args
,
unparsed
=
parser
.
parse_known_args
()
config
=
get_config
(
args
)
...
...
@@ -87,10 +86,10 @@ def seed_everything(seed, rank):
def
save_config
(
config
):
path
=
os
.
path
.
join
(
config
.
OUTPUT
,
"
config.json
"
)
with
open
(
path
,
"w"
)
as
f
:
path
=
os
.
path
.
join
(
config
.
OUTPUT
,
'
config.json
'
)
with
open
(
path
,
'w'
)
as
f
:
f
.
write
(
config
.
dump
())
logger
.
info
(
f
"
Full config saved to
{
path
}
"
)
logger
.
info
(
f
'
Full config saved to
{
path
}
'
)
def
build_criterion
(
config
):
...
...
@@ -132,10 +131,10 @@ def scale_learning_rate(config, num_processes):
def
log_model_statistic
(
model_wo_ddp
):
n_parameters
=
sum
(
p
.
numel
()
for
p
in
model_wo_ddp
.
parameters
()
if
p
.
requires_grad
)
logger
.
info
(
f
"
number of params:
{
n_parameters
/
1e6
}
M
"
)
logger
.
info
(
f
'
number of params:
{
n_parameters
/
1e6
}
M
'
)
if
hasattr
(
model_wo_ddp
,
'flops'
):
flops
=
model_wo_ddp
.
flops
()
logger
.
info
(
f
"
number of GFLOPs:
{
flops
/
1e9
}
"
)
logger
.
info
(
f
'
number of GFLOPs:
{
flops
/
1e9
}
'
)
def
get_parameter_groups
(
model
,
config
):
...
...
@@ -171,37 +170,37 @@ def build_ds_config(config, args):
opt_lower
=
config
.
TRAIN
.
OPTIMIZER
.
NAME
.
lower
()
if
opt_lower
==
'adamw'
:
optimizer
=
{
"
type
"
:
"
AdamW
"
,
"
params
"
:
{
"
lr
"
:
config
.
TRAIN
.
BASE_LR
,
"
eps
"
:
config
.
TRAIN
.
OPTIMIZER
.
EPS
,
"
betas
"
:
config
.
TRAIN
.
OPTIMIZER
.
BETAS
,
"
weight_decay
"
:
config
.
TRAIN
.
WEIGHT_DECAY
'
type
'
:
'
AdamW
'
,
'
params
'
:
{
'
lr
'
:
config
.
TRAIN
.
BASE_LR
,
'
eps
'
:
config
.
TRAIN
.
OPTIMIZER
.
EPS
,
'
betas
'
:
config
.
TRAIN
.
OPTIMIZER
.
BETAS
,
'
weight_decay
'
:
config
.
TRAIN
.
WEIGHT_DECAY
}
}
else
:
return
NotImplemented
ds_config
=
{
"
train_micro_batch_size_per_gpu
"
:
config
.
DATA
.
BATCH_SIZE
,
"
optimizer
"
:
optimizer
,
"
fp16
"
:
{
"
enabled
"
:
True
,
"
auto_cast
"
:
True
,
"
loss_scale
"
:
1
if
args
.
disable_grad_scalar
else
0
'
train_micro_batch_size_per_gpu
'
:
config
.
DATA
.
BATCH_SIZE
,
'
optimizer
'
:
optimizer
,
'
fp16
'
:
{
'
enabled
'
:
True
,
'
auto_cast
'
:
True
,
'
loss_scale
'
:
1
if
args
.
disable_grad_scalar
else
0
},
"
zero_optimization
"
:
{
"
stage
"
:
args
.
zero_stage
,
"
offload_optimizer
"
:
{
"
device
"
:
args
.
offload_optimizer
'
zero_optimization
'
:
{
'
stage
'
:
args
.
zero_stage
,
'
offload_optimizer
'
:
{
'
device
'
:
args
.
offload_optimizer
},
"
offload_param
"
:
{
"
device
"
:
args
.
offload_param
'
offload_param
'
:
{
'
device
'
:
args
.
offload_param
}
},
"
steps_per_print
"
:
1e10
,
"
gradient_accumulation_steps
"
:
config
.
TRAIN
.
ACCUMULATION_STEPS
,
"
gradient_clipping
"
:
config
.
TRAIN
.
CLIP_GRAD
,
'
steps_per_print
'
:
1e10
,
'
gradient_accumulation_steps
'
:
config
.
TRAIN
.
ACCUMULATION_STEPS
,
'
gradient_clipping
'
:
config
.
TRAIN
.
CLIP_GRAD
,
}
return
ds_config
...
...
@@ -216,14 +215,14 @@ def throughput(data_loader, model, logger):
for
i
in
range
(
50
):
model
(
images
)
torch
.
cuda
.
synchronize
()
logger
.
info
(
f
"
throughput averaged with 30 times
"
)
logger
.
info
(
f
'
throughput averaged with 30 times
'
)
tic1
=
time
.
time
()
for
i
in
range
(
30
):
model
(
images
)
torch
.
cuda
.
synchronize
()
tic2
=
time
.
time
()
logger
.
info
(
f
"
batch_size
{
batch_size
}
throughput
{
30
*
batch_size
/
(
tic2
-
tic1
)
}
"
f
'
batch_size
{
batch_size
}
throughput
{
30
*
batch_size
/
(
tic2
-
tic1
)
}
'
)
return
...
...
@@ -281,7 +280,7 @@ def train_epoch(config, model, criterion, data_loader, optimizer, epoch, mixup_f
f
'mem
{
memory_used
:.
0
f
}
MB'
)
epoch_time
=
time
.
time
()
-
start
logger
.
info
(
f
"
EPOCH
{
epoch
}
training takes
{
datetime
.
timedelta
(
seconds
=
int
(
epoch_time
))
}
"
)
logger
.
info
(
f
'
EPOCH
{
epoch
}
training takes
{
datetime
.
timedelta
(
seconds
=
int
(
epoch_time
))
}
'
)
@
torch
.
no_grad
()
...
...
@@ -361,7 +360,7 @@ def train(config, ds_config):
model
.
register_comm_hook
(
state
=
None
,
hook
=
fp16_compress_hook
)
logger
.
info
(
'using fp16_compress_hook!'
)
except
:
logger
.
info
(
"
cannot register fp16_compress_hook!
"
)
logger
.
info
(
'
cannot register fp16_compress_hook!
'
)
model_without_ddp
=
model
.
module
...
...
@@ -399,10 +398,10 @@ def train(config, ds_config):
# -------------- training ---------------- #
logger
.
info
(
f
"
Creating model:
{
config
.
MODEL
.
TYPE
}
/
{
config
.
MODEL
.
NAME
}
"
)
logger
.
info
(
f
'
Creating model:
{
config
.
MODEL
.
TYPE
}
/
{
config
.
MODEL
.
NAME
}
'
)
logger
.
info
(
str
(
model
))
logger
.
info
(
get_optimizer_state_str
(
optimizer
))
logger
.
info
(
"
Start training
"
)
logger
.
info
(
'
Start training
'
)
logger
.
info
(
'max_accuracy: {}'
.
format
(
max_accuracy
))
log_model_statistic
(
model_without_ddp
)
...
...
@@ -429,7 +428,7 @@ def train(config, ds_config):
if
epoch
%
config
.
EVAL_FREQ
==
0
:
acc1
,
_
,
_
=
eval_epoch
(
config
,
data_loader_val
,
model
,
epoch
)
logger
.
info
(
f
"
Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
"
)
logger
.
info
(
f
'
Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1
:.
1
f
}
%
'
)
if
acc1
>
max_accuracy
:
model
.
save_checkpoint
(
...
...
@@ -451,7 +450,7 @@ def train(config, ds_config):
if
model_ema
is
not
None
:
with
model_ema
.
activate
(
model
):
acc1_ema
,
_
,
_
=
eval_epoch
(
config
,
data_loader_val
,
model
,
epoch
)
logger
.
info
(
f
"
[EMA] Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1_ema
:.
1
f
}
%
"
)
logger
.
info
(
f
'
[EMA] Accuracy of the network on the
{
len
(
dataset_val
)
}
test images:
{
acc1_ema
:.
1
f
}
%
'
)
max_accuracy_ema
=
max
(
max_accuracy_ema
,
acc1_ema
)
logger
.
info
(
f
'[EMA] Max accuracy:
{
max_accuracy_ema
:.
2
f
}
%'
)
...
...
@@ -475,7 +474,8 @@ def eval(config):
logger
.
info
(
msg
)
except
:
try
:
from
deepspeed.utils.zero_to_fp32
import
get_fp32_state_dict_from_zero_checkpoint
from
deepspeed.utils.zero_to_fp32
import
\
get_fp32_state_dict_from_zero_checkpoint
ckpt_dir
=
os
.
path
.
dirname
(
config
.
MODEL
.
RESUME
)
tag
=
os
.
path
.
basename
(
config
.
MODEL
.
RESUME
)
state_dict
=
get_fp32_state_dict_from_zero_checkpoint
(
checkpoint_dir
=
ckpt_dir
,
tag
=
tag
)
...
...
@@ -498,30 +498,30 @@ if __name__ == '__main__':
# init distributed env
if
'SLURM_PROCID'
in
os
.
environ
and
int
(
os
.
environ
[
'SLURM_TASKS_PER_NODE'
])
!=
1
:
print
(
"
\n
Dist init: SLURM
"
)
print
(
'
\n
Dist init: SLURM
'
)
rank
=
int
(
os
.
environ
[
'SLURM_PROCID'
])
gpu
=
rank
%
torch
.
cuda
.
device_count
()
config
.
defrost
()
config
.
LOCAL_RANK
=
gpu
config
.
freeze
()
world_size
=
int
(
os
.
environ
[
"
SLURM_NTASKS
"
])
if
"
MASTER_PORT
"
not
in
os
.
environ
:
os
.
environ
[
"
MASTER_PORT
"
]
=
"
29501
"
node_list
=
os
.
environ
[
"
SLURM_NODELIST
"
]
world_size
=
int
(
os
.
environ
[
'
SLURM_NTASKS
'
])
if
'
MASTER_PORT
'
not
in
os
.
environ
:
os
.
environ
[
'
MASTER_PORT
'
]
=
'
29501
'
node_list
=
os
.
environ
[
'
SLURM_NODELIST
'
]
addr
=
subprocess
.
getoutput
(
f
"
scontrol show hostname
{
node_list
}
| head -n1
"
)
if
"
MASTER_ADDR
"
not
in
os
.
environ
:
os
.
environ
[
"
MASTER_ADDR
"
]
=
addr
f
'
scontrol show hostname
{
node_list
}
| head -n1
'
)
if
'
MASTER_ADDR
'
not
in
os
.
environ
:
os
.
environ
[
'
MASTER_ADDR
'
]
=
addr
os
.
environ
[
'RANK'
]
=
str
(
rank
)
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
gpu
)
os
.
environ
[
'LOCAL_SIZE'
]
=
str
(
torch
.
cuda
.
device_count
())
os
.
environ
[
'WORLD_SIZE'
]
=
str
(
world_size
)
if
'RANK'
in
os
.
environ
and
'WORLD_SIZE'
in
os
.
environ
:
rank
=
int
(
os
.
environ
[
"
RANK
"
])
rank
=
int
(
os
.
environ
[
'
RANK
'
])
world_size
=
int
(
os
.
environ
[
'WORLD_SIZE'
])
print
(
f
"
RANK and WORLD_SIZE in environ:
{
rank
}
/
{
world_size
}
"
)
print
(
f
'
RANK and WORLD_SIZE in environ:
{
rank
}
/
{
world_size
}
'
)
else
:
rank
=
-
1
world_size
=
-
1
...
...
@@ -535,7 +535,7 @@ if __name__ == '__main__':
os
.
makedirs
(
config
.
OUTPUT
,
exist_ok
=
True
)
logger
=
create_logger
(
output_dir
=
config
.
OUTPUT
,
dist_rank
=
dist
.
get_rank
(),
name
=
f
"
{
config
.
MODEL
.
NAME
}
"
)
name
=
f
'
{
config
.
MODEL
.
NAME
}
'
)
logger
.
info
(
config
.
dump
())
if
dist
.
get_rank
()
==
0
:
save_config
(
config
)
...
...
classification/meta_data/map22kto1k.txt
View file @
41b18fd8
...
...
@@ -997,4 +997,4 @@
21194
21198
21367
21815
\ No newline at end of file
21815
classification/models/__init__.py
View file @
41b18fd8
...
...
@@ -4,4 +4,4 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from
.build
import
build_model
\ No newline at end of file
from
.build
import
build_model
classification/models/build.py
View file @
41b18fd8
...
...
@@ -31,6 +31,6 @@ def build_model(config):
remove_center
=
config
.
MODEL
.
INTERN_IMAGE
.
REMOVE_CENTER
,
)
else
:
raise
NotImplementedError
(
f
"
Unkown model:
{
model_type
}
"
)
raise
NotImplementedError
(
f
'
Unkown model:
{
model_type
}
'
)
return
model
classification/models/intern_image.py
View file @
41b18fd8
...
...
@@ -6,10 +6,10 @@
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.utils.checkpoint
as
checkpoint
from
timm.models.layers
import
trunc_normal_
,
DropPath
from
ops_dcnv3
import
modules
as
opsm
import
torch.nn.functional
as
F
from
timm.models.layers
import
DropPath
,
trunc_normal_
class
to_channels_first
(
nn
.
Module
):
...
...
@@ -80,7 +80,7 @@ class CrossAttention(nn.Module):
attn_head_dim (int, optional): Dimension of attention head.
out_dim (int, optional): Dimension of output.
"""
def
__init__
(
self
,
dim
,
num_heads
=
8
,
...
...
@@ -172,7 +172,7 @@ class AttentiveBlock(nn.Module):
attn_head_dim (int, optional): Dimension of attention head. Default: None.
out_dim (int, optional): Dimension of output. Default: None.
"""
def
__init__
(
self
,
dim
,
num_heads
,
...
...
@@ -181,7 +181,7 @@ class AttentiveBlock(nn.Module):
drop
=
0.
,
attn_drop
=
0.
,
drop_path
=
0.
,
norm_layer
=
"
LN
"
,
norm_layer
=
'
LN
'
,
attn_head_dim
=
None
,
out_dim
=
None
):
super
().
__init__
()
...
...
@@ -593,10 +593,10 @@ class InternImage(nn.Module):
print
(
f
'using activation layer:
{
act_layer
}
'
)
print
(
f
'using main norm layer:
{
norm_layer
}
'
)
print
(
f
'using dpr:
{
drop_path_type
}
,
{
drop_path_rate
}
'
)
print
(
f
"
level2_post_norm:
{
level2_post_norm
}
"
)
print
(
f
"
level2_post_norm_block_ids:
{
level2_post_norm_block_ids
}
"
)
print
(
f
"
res_post_norm:
{
res_post_norm
}
"
)
print
(
f
"
remove_center:
{
remove_center
}
"
)
print
(
f
'
level2_post_norm:
{
level2_post_norm
}
'
)
print
(
f
'
level2_post_norm_block_ids:
{
level2_post_norm_block_ids
}
'
)
print
(
f
'
res_post_norm:
{
res_post_norm
}
'
)
print
(
f
'
remove_center:
{
remove_center
}
'
)
in_chans
=
3
self
.
patch_embed
=
StemLayer
(
in_chans
=
in_chans
,
...
...
@@ -638,7 +638,7 @@ class InternImage(nn.Module):
remove_center
=
remove_center
,
# for InternImage-H/G
)
self
.
levels
.
append
(
level
)
if
not
use_clip_projector
:
# for InternImage-T/S/B/L/XL
self
.
conv_head
=
nn
.
Sequential
(
nn
.
Conv2d
(
self
.
num_features
,
...
...
@@ -671,7 +671,7 @@ class InternImage(nn.Module):
self
.
fc_norm
=
build_norm_layer
(
clip_embed_dim
,
norm_layer
,
eps
=
1e-6
)
self
.
head
=
nn
.
Linear
(
clip_embed_dim
,
num_classes
)
if
num_classes
>
0
else
nn
.
Identity
()
self
.
avgpool
=
nn
.
AdaptiveAvgPool2d
((
1
,
1
))
self
.
num_layers
=
len
(
depths
)
self
.
apply
(
self
.
_init_weights
)
...
...
@@ -705,16 +705,16 @@ class InternImage(nn.Module):
lr_ratios
[
tag
]
=
decay
idx
+=
1
# patch_embed (before stage-1)
lr_ratios
[
"
patch_embed
"
]
=
lr_ratios
[
'levels.0.blocks.0.'
]
lr_ratios
[
'
patch_embed
'
]
=
lr_ratios
[
'levels.0.blocks.0.'
]
# levels.0.downsample (between stage-1 and stage-2)
lr_ratios
[
"
levels.0.downsample
"
]
=
lr_ratios
[
'levels.1.blocks.0.'
]
lr_ratios
[
"
levels.0.norm
"
]
=
lr_ratios
[
'levels.1.blocks.0.'
]
lr_ratios
[
'
levels.0.downsample
'
]
=
lr_ratios
[
'levels.1.blocks.0.'
]
lr_ratios
[
'
levels.0.norm
'
]
=
lr_ratios
[
'levels.1.blocks.0.'
]
# levels.1.downsample (between stage-2 and stage-3)
lr_ratios
[
"
levels.1.downsample
"
]
=
lr_ratios
[
'levels.2.blocks.0.'
]
lr_ratios
[
"
levels.1.norm
"
]
=
lr_ratios
[
'levels.2.blocks.0.'
]
lr_ratios
[
'
levels.1.downsample
'
]
=
lr_ratios
[
'levels.2.blocks.0.'
]
lr_ratios
[
'
levels.1.norm
'
]
=
lr_ratios
[
'levels.2.blocks.0.'
]
# levels.2.downsample (between stage-3 and stage-4)
lr_ratios
[
"
levels.2.downsample
"
]
=
lr_ratios
[
'levels.3.blocks.0.'
]
lr_ratios
[
"
levels.2.norm
"
]
=
lr_ratios
[
'levels.3.blocks.0.'
]
lr_ratios
[
'
levels.2.downsample
'
]
=
lr_ratios
[
'levels.3.blocks.0.'
]
lr_ratios
[
'
levels.2.norm
'
]
=
lr_ratios
[
'levels.3.blocks.0.'
]
return
lr_ratios
def
forward_features
(
self
,
x
):
...
...
@@ -738,11 +738,11 @@ class InternImage(nn.Module):
x
,
x_
=
level
(
x
,
return_wo_downsample
=
True
)
seq_out
.
append
(
x_
)
return
seq_out
def
forward_clip_projector
(
self
,
x
):
# for InternImage-H/G
xs
=
self
.
forward_features_seq_out
(
x
)
x1
,
x2
,
x3
,
x4
=
xs
x1
=
x1
.
permute
(
0
,
3
,
1
,
2
)
# NHWC -> NCHW
x2
=
x2
.
permute
(
0
,
3
,
1
,
2
)
# NHWC -> NCHW
x3
=
x3
.
permute
(
0
,
3
,
1
,
2
)
# NHWC -> NCHW
...
...
@@ -756,9 +756,9 @@ class InternImage(nn.Module):
x
=
x
.
flatten
(
-
2
).
transpose
(
1
,
2
).
contiguous
()
x
=
self
.
clip_projector
(
x
)
x
=
self
.
fc_norm
(
x
)
return
x
def
forward
(
self
,
x
):
if
self
.
use_clip_projector
:
# for InternImage-H/G
x
=
self
.
forward_clip_projector
(
x
)
...
...
classification/ops_dcnv3/functions/dcnv3_func.py
View file @
41b18fd8
...
...
@@ -4,18 +4,16 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
from
__future__
import
absolute_import
,
division
,
print_function
import
DCNv3
import
pkg_resources
import
torch
import
torch.nn.functional
as
F
from
torch.autograd
import
Function
from
torch.autograd.function
import
once_differentiable
from
torch.cuda.amp
import
custom_bwd
,
custom_fwd
import
DCNv3
import
pkg_resources
dcn_version
=
float
(
pkg_resources
.
get_distribution
(
'DCNv3'
).
version
)
...
...
@@ -169,6 +167,7 @@ def remove_center_sampling_locations(sampling_locations, kernel_w, kernel_h):
sampling_locations
=
sampling_locations
[:,:,:,
idx
,
:]
return
sampling_locations
def
dcnv3_core_pytorch
(
input
,
offset
,
mask
,
kernel_h
,
kernel_w
,
stride_h
,
stride_w
,
pad_h
,
...
...
classification/ops_dcnv3/modules/__init__.py
View file @
41b18fd8
...
...
@@ -4,4 +4,4 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from
.dcnv3
import
DCNv3
,
DCNv3_pytorch
\ No newline at end of file
from
.dcnv3
import
DCNv3
,
DCNv3_pytorch
classification/ops_dcnv3/modules/dcnv3.py
View file @
41b18fd8
...
...
@@ -4,15 +4,15 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from
__future__
import
absolute_import
from
__future__
import
print_function
from
__future__
import
division
from
__future__
import
absolute_import
,
division
,
print_function
import
warnings
import
torch
from
torch
import
nn
import
torch.nn.functional
as
F
from
torch.nn.init
import
xavier_uniform_
,
constant_
from
torch
import
nn
from
torch.nn.init
import
constant_
,
xavier_uniform_
from
..functions
import
DCNv3Function
,
dcnv3_core_pytorch
...
...
@@ -72,7 +72,7 @@ def build_act_layer(act_layer):
def
_is_power_of_2
(
n
):
if
(
not
isinstance
(
n
,
int
))
or
(
n
<
0
):
raise
ValueError
(
"
invalid input for _is_power_of_2: {} (type: {})
"
.
format
(
n
,
type
(
n
)))
'
invalid input for _is_power_of_2: {} (type: {})
'
.
format
(
n
,
type
(
n
)))
return
(
n
&
(
n
-
1
)
==
0
)
and
n
!=
0
...
...
@@ -126,7 +126,7 @@ class DCNv3_pytorch(nn.Module):
if
not
_is_power_of_2
(
_d_per_group
):
warnings
.
warn
(
"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
"
which is more efficient in our CUDA implementation.
"
)
'
which is more efficient in our CUDA implementation.
'
)
self
.
offset_scale
=
offset_scale
self
.
channels
=
channels
...
...
@@ -164,7 +164,7 @@ class DCNv3_pytorch(nn.Module):
self
.
input_proj
=
nn
.
Linear
(
channels
,
channels
)
self
.
output_proj
=
nn
.
Linear
(
channels
,
channels
)
self
.
_reset_parameters
()
if
center_feature_scale
:
self
.
center_feature_scale_proj_weight
=
nn
.
Parameter
(
torch
.
zeros
((
group
,
channels
),
dtype
=
torch
.
float
))
...
...
@@ -256,7 +256,7 @@ class DCNv3(nn.Module):
if
not
_is_power_of_2
(
_d_per_group
):
warnings
.
warn
(
"You'd better set channels in DCNv3 to make the dimension of each attention head a power of 2 "
"
which is more efficient in our CUDA implementation.
"
)
'
which is more efficient in our CUDA implementation.
'
)
self
.
offset_scale
=
offset_scale
self
.
channels
=
channels
...
...
@@ -297,7 +297,7 @@ class DCNv3(nn.Module):
self
.
input_proj
=
nn
.
Linear
(
channels
,
channels
)
self
.
output_proj
=
nn
.
Linear
(
channels
,
channels
)
self
.
_reset_parameters
()
if
center_feature_scale
:
self
.
center_feature_scale_proj_weight
=
nn
.
Parameter
(
torch
.
zeros
((
group
,
channels
),
dtype
=
torch
.
float
))
...
...
@@ -332,7 +332,7 @@ class DCNv3(nn.Module):
mask
=
self
.
mask
(
x1
).
reshape
(
N
,
H
,
W
,
self
.
group
,
-
1
)
mask
=
F
.
softmax
(
mask
,
-
1
)
mask
=
mask
.
reshape
(
N
,
H
,
W
,
-
1
).
type
(
dtype
)
x
=
DCNv3Function
.
apply
(
x
,
offset
,
mask
,
self
.
kernel_size
,
self
.
kernel_size
,
...
...
@@ -343,7 +343,7 @@ class DCNv3(nn.Module):
self
.
offset_scale
,
256
,
self
.
remove_center
)
if
self
.
center_feature_scale
:
center_feature_scale
=
self
.
center_feature_scale_module
(
x1
,
self
.
center_feature_scale_proj_weight
,
self
.
center_feature_scale_proj_bias
)
...
...
classification/ops_dcnv3/setup.py
View file @
41b18fd8
...
...
@@ -4,39 +4,34 @@
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import
os
import
glob
import
os
import
torch
from
setuptools
import
find_packages
,
setup
from
torch.utils.cpp_extension
import
CUDA_HOME
,
CppExtension
,
CUDAExtension
from
torch.utils.cpp_extension
import
CUDA_HOME
from
torch.utils.cpp_extension
import
CppExtension
from
torch.utils.cpp_extension
import
CUDAExtension
from
setuptools
import
find_packages
from
setuptools
import
setup
requirements
=
[
"torch"
,
"torchvision"
]
requirements
=
[
'torch'
,
'torchvision'
]
def
get_extensions
():
this_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
extensions_dir
=
os
.
path
.
join
(
this_dir
,
"
src
"
)
extensions_dir
=
os
.
path
.
join
(
this_dir
,
'
src
'
)
main_file
=
glob
.
glob
(
os
.
path
.
join
(
extensions_dir
,
"
*.cpp
"
))
source_cpu
=
glob
.
glob
(
os
.
path
.
join
(
extensions_dir
,
"
cpu
"
,
"
*.cpp
"
))
source_cuda
=
glob
.
glob
(
os
.
path
.
join
(
extensions_dir
,
"
cuda
"
,
"
*.cu
"
))
main_file
=
glob
.
glob
(
os
.
path
.
join
(
extensions_dir
,
'
*.cpp
'
))
source_cpu
=
glob
.
glob
(
os
.
path
.
join
(
extensions_dir
,
'
cpu
'
,
'
*.cpp
'
))
source_cuda
=
glob
.
glob
(
os
.
path
.
join
(
extensions_dir
,
'
cuda
'
,
'
*.cu
'
))
sources
=
main_file
+
source_cpu
extension
=
CppExtension
extra_compile_args
=
{
"
cxx
"
:
[]}
extra_compile_args
=
{
'
cxx
'
:
[]}
define_macros
=
[]
if
torch
.
cuda
.
is_available
()
and
CUDA_HOME
is
not
None
:
extension
=
CUDAExtension
sources
+=
source_cuda
define_macros
+=
[(
"
WITH_CUDA
"
,
None
)]
extra_compile_args
[
"
nvcc
"
]
=
[
define_macros
+=
[(
'
WITH_CUDA
'
,
None
)]
extra_compile_args
[
'
nvcc
'
]
=
[
# "-DCUDA_HAS_FP16=1",
# "-D__CUDA_NO_HALF_OPERATORS__",
# "-D__CUDA_NO_HALF_CONVERSIONS__",
...
...
@@ -49,7 +44,7 @@ def get_extensions():
include_dirs
=
[
extensions_dir
]
ext_modules
=
[
extension
(
"
DCNv3
"
,
'
DCNv3
'
,
sources
,
include_dirs
=
include_dirs
,
define_macros
=
define_macros
,
...
...
@@ -60,16 +55,16 @@ def get_extensions():
setup
(
name
=
"
DCNv3
"
,
version
=
"
1.1
"
,
author
=
"
InternImage
"
,
url
=
"
https://github.com/OpenGVLab/InternImage
"
,
name
=
'
DCNv3
'
,
version
=
'
1.1
'
,
author
=
'
InternImage
'
,
url
=
'
https://github.com/OpenGVLab/InternImage
'
,
description
=
"
PyTorch Wrapper for CUDA Functions of DCNv3
"
,
'
PyTorch Wrapper for CUDA Functions of DCNv3
'
,
packages
=
find_packages
(
exclude
=
(
"
configs
"
,
"
tests
"
,
'
configs
'
,
'
tests
'
,
)),
ext_modules
=
get_extensions
(),
cmdclass
=
{
"
build_ext
"
:
torch
.
utils
.
cpp_extension
.
BuildExtension
},
cmdclass
=
{
'
build_ext
'
:
torch
.
utils
.
cpp_extension
.
BuildExtension
},
)
classification/ops_dcnv3/src/cuda/dcnv3_cuda.cu
View file @
41b18fd8
...
...
@@ -171,4 +171,4 @@ dcnv3_cuda_backward(const at::Tensor &input, const at::Tensor &offset,
}
else
{
return
{
grad_input
,
grad_offset
,
grad_mask
};
}
}
\ No newline at end of file
}
classification/ops_dcnv3/src/cuda/dcnv3_im2col_cuda.cuh
View file @
41b18fd8
...
...
@@ -1091,4 +1091,4 @@ void dcnv3_col2im_cuda(
if
(
err
!=
cudaSuccess
)
{
printf
(
"error in dcnv3_col2im_cuda: %s
\n
"
,
cudaGetErrorString
(
err
));
}
}
\ No newline at end of file
}
Prev
1
…
11
12
13
14
15
16
17
18
19
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment