Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
350ccc04
Unverified
Commit
350ccc04
authored
Nov 08, 2022
by
Jiarui Fang
Committed by
GitHub
Nov 08, 2022
Browse files
[example] opt does not depend on Titans (#1811)
parent
6fa71d65
Changes
4
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
34 additions
and
11 deletions
+34
-11
examples/language/opt/context.py
examples/language/opt/context.py
+32
-0
examples/language/opt/log
examples/language/opt/log
+0
-10
examples/language/opt/requirements.txt
examples/language/opt/requirements.txt
+1
-0
examples/language/opt/run_clm.py
examples/language/opt/run_clm.py
+1
-1
No files found.
examples/language/opt/context.py
0 → 100644
View file @
350ccc04
import
torch.distributed
as
dist
from
colossalai.context
import
ParallelMode
from
colossalai.core
import
global_context
as
gpc
class
barrier_context
():
"""
This context manager is used to allow one process to execute while blocking all
other processes in the same process group. This is often useful when downloading is required
as we only want to download in one process to prevent file corruption.
Args:
executor_rank (int): the process rank to execute without blocking, all other processes will be blocked
parallel_mode (ParallelMode): the parallel mode corresponding to a process group
Usage:
with barrier_context():
dataset = CIFAR10(root='./data', download=True)
"""
def
__init__
(
self
,
executor_rank
:
int
=
0
,
parallel_mode
:
ParallelMode
=
ParallelMode
.
GLOBAL
):
# the class name is lowercase by convention
current_rank
=
gpc
.
get_local_rank
(
parallel_mode
=
parallel_mode
)
self
.
should_block
=
current_rank
!=
executor_rank
self
.
group
=
gpc
.
get_group
(
parallel_mode
=
parallel_mode
)
def
__enter__
(
self
):
if
self
.
should_block
:
dist
.
barrier
(
group
=
self
.
group
)
def
__exit__
(
self
,
exc_type
,
exc_value
,
exc_traceback
):
if
not
self
.
should_block
:
dist
.
barrier
(
group
=
self
.
group
)
examples/language/opt/log
deleted
100644 → 0
View file @
6fa71d65
This diff is collapsed.
Click to expand it.
examples/language/opt/requirements.txt
View file @
350ccc04
...
...
@@ -3,3 +3,4 @@ torch >= 1.8.1
datasets >= 1.8.0
sentencepiece != 0.1.92
protobuf
accelerate == 0.13.2
examples/language/opt/run_clm.py
View file @
350ccc04
...
...
@@ -32,9 +32,9 @@ import datasets
import
torch
import
torch.distributed
as
dist
from
accelerate.utils
import
set_seed
from
context
import
barrier_context
from
datasets
import
load_dataset
from
packaging
import
version
from
titans.utils
import
barrier_context
from
torch.utils.data
import
DataLoader
from
tqdm.auto
import
tqdm
from
utils
import
colo_memory_cap
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment