Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
SOLOv2-pytorch
Commits
fd67644c
Commit
fd67644c
authored
Apr 10, 2019
by
Kai Chen
Browse files
use torch.distributed.barrier() instead of the self-implemented one
parent
64b1c8b6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
35 deletions
+4
-35
mmdet/core/evaluation/eval_hooks.py
mmdet/core/evaluation/eval_hooks.py
+4
-35
No files found.
mmdet/core/evaluation/eval_hooks.py
View file @
fd67644c
import
os
import
os.path
as
osp
import
shutil
import
time
import
mmcv
import
numpy
as
np
import
torch
import
torch.distributed
as
dist
from
mmcv.runner
import
Hook
,
obj_from_dict
from
mmcv.parallel
import
scatter
,
collate
from
pycocotools.cocoeval
import
COCOeval
...
...
@@ -29,36 +28,6 @@ class DistEvalHook(Hook):
'dataset must be a Dataset object or a dict, not {}'
.
format
(
type
(
dataset
)))
self
.
interval
=
interval
self
.
lock_dir
=
None
def
_barrier
(
self
,
rank
,
world_size
):
"""Due to some issues with `torch.distributed.barrier()`, we have to
implement this ugly barrier function.
"""
if
rank
==
0
:
for
i
in
range
(
1
,
world_size
):
tmp
=
osp
.
join
(
self
.
lock_dir
,
'{}.pkl'
.
format
(
i
))
while
not
(
osp
.
exists
(
tmp
)):
time
.
sleep
(
1
)
for
i
in
range
(
1
,
world_size
):
tmp
=
osp
.
join
(
self
.
lock_dir
,
'{}.pkl'
.
format
(
i
))
os
.
remove
(
tmp
)
else
:
tmp
=
osp
.
join
(
self
.
lock_dir
,
'{}.pkl'
.
format
(
rank
))
mmcv
.
dump
([],
tmp
)
while
osp
.
exists
(
tmp
):
time
.
sleep
(
1
)
def
before_run
(
self
,
runner
):
self
.
lock_dir
=
osp
.
join
(
runner
.
work_dir
,
'.lock_map_hook'
)
if
runner
.
rank
==
0
:
if
osp
.
exists
(
self
.
lock_dir
):
shutil
.
rmtree
(
self
.
lock_dir
)
mmcv
.
mkdir_or_exist
(
self
.
lock_dir
)
def
after_run
(
self
,
runner
):
if
runner
.
rank
==
0
:
shutil
.
rmtree
(
self
.
lock_dir
)
def
after_train_epoch
(
self
,
runner
):
if
not
self
.
every_n_epochs
(
runner
,
self
.
interval
):
...
...
@@ -84,7 +53,7 @@ class DistEvalHook(Hook):
if
runner
.
rank
==
0
:
print
(
'
\n
'
)
self
.
_
barrier
(
runner
.
rank
,
runner
.
world_size
)
dist
.
barrier
()
for
i
in
range
(
1
,
runner
.
world_size
):
tmp_file
=
osp
.
join
(
runner
.
work_dir
,
'temp_{}.pkl'
.
format
(
i
))
tmp_results
=
mmcv
.
load
(
tmp_file
)
...
...
@@ -96,8 +65,8 @@ class DistEvalHook(Hook):
tmp_file
=
osp
.
join
(
runner
.
work_dir
,
'temp_{}.pkl'
.
format
(
runner
.
rank
))
mmcv
.
dump
(
results
,
tmp_file
)
self
.
_
barrier
(
runner
.
rank
,
runner
.
world_size
)
self
.
_
barrier
(
runner
.
rank
,
runner
.
world_size
)
dist
.
barrier
()
dist
.
barrier
()
def
evaluate
(
self
):
raise
NotImplementedError
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment