Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
2af29c19
Commit
2af29c19
authored
Aug 16, 2018
by
Michael Carilli
Browse files
Removing orphaned /distributed/run_distributed.sh
parent
1d45fada
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
9 deletions
+9
-9
examples/distributed/run_distributed.sh
examples/distributed/run_distributed.sh
+0
-1
examples/imagenet/main.py
examples/imagenet/main.py
+1
-1
examples/imagenet/main_fp16_optimizer.py
examples/imagenet/main_fp16_optimizer.py
+8
-7
No files found.
examples/distributed/run_distributed.sh
deleted
100644 → 0
View file @
1d45fada
export
CUDA_VISIBLE_DEVICES
=
0,1
;
python
-m
apex.parallel.multiproc main.py
examples/imagenet/main.py
View file @
2af29c19
...
...
@@ -98,7 +98,7 @@ def main():
args
.
gpu
=
0
args
.
world_size
=
1
if
args
.
distributed
:
args
.
gpu
=
args
.
local_rank
%
torch
.
cuda
.
device_count
()
torch
.
cuda
.
set_device
(
args
.
gpu
)
...
...
examples/imagenet/main_fp16_optimizer.py
View file @
2af29c19
...
...
@@ -100,13 +100,14 @@ def main():
args
.
distributed
=
int
(
os
.
environ
[
'WORLD_SIZE'
])
>
1
args
.
gpu
=
0
args
.
world_size
=
1
if
args
.
distributed
:
args
.
gpu
=
args
.
local_rank
%
torch
.
cuda
.
device_count
()
if
args
.
distributed
:
torch
.
cuda
.
set_device
(
args
.
gpu
)
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
,
init_method
=
'env://'
)
args
.
world_size
=
torch
.
distributed
.
get_world_size
()
if
args
.
fp16
:
assert
torch
.
backends
.
cudnn
.
enabled
,
"fp16 mode requires cudnn backend to be enabled."
...
...
@@ -324,8 +325,8 @@ def train(train_loader, model, criterion, optimizer, epoch):
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})
\t
'
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'
.
format
(
epoch
,
i
,
len
(
train_loader
),
torch
.
distributed
.
get_
world_size
()
*
args
.
batch_size
/
batch_time
.
val
,
torch
.
distributed
.
get_
world_size
()
*
args
.
batch_size
/
batch_time
.
avg
,
args
.
world_size
*
args
.
batch_size
/
batch_time
.
val
,
args
.
world_size
*
args
.
batch_size
/
batch_time
.
avg
,
batch_time
=
batch_time
,
data_time
=
data_time
,
loss
=
losses
,
top1
=
top1
,
top5
=
top5
))
...
...
@@ -382,8 +383,8 @@ def validate(val_loader, model, criterion):
'Prec@1 {top1.val:.3f} ({top1.avg:.3f})
\t
'
'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'
.
format
(
i
,
len
(
val_loader
),
torch
.
distributed
.
get_
world_size
()
*
args
.
batch_size
/
batch_time
.
val
,
torch
.
distributed
.
get_
world_size
()
*
args
.
batch_size
/
batch_time
.
avg
,
args
.
world_size
*
args
.
batch_size
/
batch_time
.
val
,
args
.
world_size
*
args
.
batch_size
/
batch_time
.
avg
,
batch_time
=
batch_time
,
loss
=
losses
,
top1
=
top1
,
top5
=
top5
))
...
...
@@ -445,7 +446,7 @@ def accuracy(output, target, topk=(1,)):
def
reduce_tensor
(
tensor
):
rt
=
tensor
.
clone
()
dist
.
all_reduce
(
rt
,
op
=
dist
.
reduce_op
.
SUM
)
rt
/=
torch
.
distributed
.
get_
world_size
()
rt
/=
args
.
world_size
return
rt
if
__name__
==
'__main__'
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment