Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
7faef933
"tests/test_data/vscode:/vscode.git/clone" did not exist on "e3daf724458ee96ec49cfd1a040ce28adce2cb53"
Unverified
Commit
7faef933
authored
May 31, 2022
by
ver217
Committed by
GitHub
May 31, 2022
Browse files
fix dist spec mgr (#1045)
parent
9492a561
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
1 deletion
+18
-1
colossalai/tensor/dist_spec_mgr.py
colossalai/tensor/dist_spec_mgr.py
+1
-1
tests/test_tensor/test_dist_spec_mgr.py
tests/test_tensor/test_dist_spec_mgr.py
+17
-0
No files found.
colossalai/tensor/dist_spec_mgr.py
View file @
7faef933
...
...
@@ -34,7 +34,7 @@ class DistSpecManager:
chunk_size
=
divide
(
tensor
.
size
(
dim
),
dist_spec
.
num_partitions
[
i
])
chunk
=
chunk
.
narrow
(
dim
,
idx
//
num_parts
*
chunk_size
,
chunk_size
)
idx
%=
num_parts
return
chunk
.
detach
().
contiguous
()
return
chunk
.
clone
().
detach
().
contiguous
()
@
staticmethod
def
_gather
(
tensor
:
torch
.
Tensor
,
old_dist_spec
:
_DistSpec
)
->
torch
.
Tensor
:
...
...
tests/test_tensor/test_dist_spec_mgr.py
View file @
7faef933
...
...
@@ -33,8 +33,25 @@ def run():
assert
torch
.
equal
(
x
,
DistSpecManager
.
_gather
(
mat_shard
,
mat_spec
))
def
check_mem
():
group
=
_get_default_group
()
size
=
dist
.
get_world_size
()
assert
torch
.
cuda
.
memory_allocated
()
==
0
x
=
torch
.
rand
(
32
,
32
).
cuda
()
orig_mem
=
x
.
numel
()
*
x
.
element_size
()
assert
torch
.
cuda
.
memory_allocated
()
==
orig_mem
old_dist_spec
=
distspec
.
replicate
()
row_spec
=
distspec
.
shard
(
group
,
[
0
],
[
size
])
x
.
data
=
DistSpecManager
.
_shard_as
(
x
,
old_dist_spec
,
row_spec
)
assert
x
.
size
(
0
)
==
32
//
size
and
x
.
size
(
1
)
==
32
assert
torch
.
cuda
.
memory_allocated
()
==
orig_mem
//
size
x
.
data
=
DistSpecManager
.
_gather
(
x
,
row_spec
)
assert
torch
.
cuda
.
memory_allocated
()
==
orig_mem
def
run_dist
(
rank
,
world_size
,
port
):
colossalai
.
launch
(
config
=
{},
rank
=
rank
,
world_size
=
world_size
,
host
=
'localhost'
,
port
=
port
,
backend
=
'nccl'
)
check_mem
()
run
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment