Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
7bc5a8e3
Commit
7bc5a8e3
authored
May 05, 2023
by
zhuwenwen
Browse files
Merge branch 'main' of
https://github.com/hpcaitech/ColossalAI
parents
e6748d82
0f785cb1
Changes
428
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1570 additions
and
0 deletions
+1570
-0
applications/Chat/coati/replay_buffer/base.py
applications/Chat/coati/replay_buffer/base.py
+43
-0
applications/Chat/coati/replay_buffer/naive.py
applications/Chat/coati/replay_buffer/naive.py
+57
-0
applications/Chat/coati/replay_buffer/utils.py
applications/Chat/coati/replay_buffer/utils.py
+73
-0
applications/Chat/coati/trainer/__init__.py
applications/Chat/coati/trainer/__init__.py
+6
-0
applications/Chat/coati/trainer/base.py
applications/Chat/coati/trainer/base.py
+75
-0
applications/Chat/coati/trainer/callbacks/__init__.py
applications/Chat/coati/trainer/callbacks/__init__.py
+5
-0
applications/Chat/coati/trainer/callbacks/base.py
applications/Chat/coati/trainer/callbacks/base.py
+39
-0
applications/Chat/coati/trainer/callbacks/performance_evaluator.py
...ons/Chat/coati/trainer/callbacks/performance_evaluator.py
+183
-0
applications/Chat/coati/trainer/callbacks/save_checkpoint.py
applications/Chat/coati/trainer/callbacks/save_checkpoint.py
+75
-0
applications/Chat/coati/trainer/ppo.py
applications/Chat/coati/trainer/ppo.py
+213
-0
applications/Chat/coati/trainer/rm.py
applications/Chat/coati/trainer/rm.py
+123
-0
applications/Chat/coati/trainer/sft.py
applications/Chat/coati/trainer/sft.py
+135
-0
applications/Chat/coati/trainer/strategies/__init__.py
applications/Chat/coati/trainer/strategies/__init__.py
+6
-0
applications/Chat/coati/trainer/strategies/base.py
applications/Chat/coati/trainer/strategies/base.py
+132
-0
applications/Chat/coati/trainer/strategies/colossalai.py
applications/Chat/coati/trainer/strategies/colossalai.py
+188
-0
applications/Chat/coati/trainer/strategies/ddp.py
applications/Chat/coati/trainer/strategies/ddp.py
+93
-0
applications/Chat/coati/trainer/strategies/naive.py
applications/Chat/coati/trainer/strategies/naive.py
+70
-0
applications/Chat/coati/trainer/strategies/sampler.py
applications/Chat/coati/trainer/strategies/sampler.py
+32
-0
applications/Chat/coati/trainer/utils.py
applications/Chat/coati/trainer/utils.py
+19
-0
applications/Chat/coati/utils/__init__.py
applications/Chat/coati/utils/__init__.py
+3
-0
No files found.
Too many changes to show.
To preserve performance only
428 of 428+
files are displayed.
Plain diff
Email patch
applications/Chat/coati/replay_buffer/base.py
0 → 100644
View file @
7bc5a8e3
from
abc
import
ABC
,
abstractmethod
from
typing
import
Any
from
coati.experience_maker.base
import
Experience
class
ReplayBuffer
(
ABC
):
"""Replay buffer base class. It stores experience.
Args:
sample_batch_size (int): Batch size when sampling.
limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
"""
def
__init__
(
self
,
sample_batch_size
:
int
,
limit
:
int
=
0
)
->
None
:
super
().
__init__
()
self
.
sample_batch_size
=
sample_batch_size
# limit <= 0 means unlimited
self
.
limit
=
limit
@
abstractmethod
def
append
(
self
,
experience
:
Experience
)
->
None
:
pass
@
abstractmethod
def
clear
(
self
)
->
None
:
pass
@
abstractmethod
def
sample
(
self
)
->
Experience
:
pass
@
abstractmethod
def
__len__
(
self
)
->
int
:
pass
@
abstractmethod
def
__getitem__
(
self
,
idx
:
int
)
->
Any
:
pass
@
abstractmethod
def
collate_fn
(
self
,
batch
:
Any
)
->
Experience
:
pass
applications/Chat/coati/replay_buffer/naive.py
0 → 100644
View file @
7bc5a8e3
import
random
from
typing
import
List
import
torch
from
coati.experience_maker.base
import
Experience
from
.base
import
ReplayBuffer
from
.utils
import
BufferItem
,
make_experience_batch
,
split_experience_batch
class
NaiveReplayBuffer
(
ReplayBuffer
):
"""Naive replay buffer class. It stores experience.
Args:
sample_batch_size (int): Batch size when sampling.
limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
cpu_offload (bool, optional): Whether to offload experience to cpu when sampling. Defaults to True.
"""
def
__init__
(
self
,
sample_batch_size
:
int
,
limit
:
int
=
0
,
cpu_offload
:
bool
=
True
)
->
None
:
super
().
__init__
(
sample_batch_size
,
limit
)
self
.
cpu_offload
=
cpu_offload
self
.
target_device
=
torch
.
device
(
f
'cuda:
{
torch
.
cuda
.
current_device
()
}
'
)
# TODO(ver217): add prefetch
self
.
items
:
List
[
BufferItem
]
=
[]
@
torch
.
no_grad
()
def
append
(
self
,
experience
:
Experience
)
->
None
:
if
self
.
cpu_offload
:
experience
.
to_device
(
torch
.
device
(
'cpu'
))
items
=
split_experience_batch
(
experience
)
self
.
items
.
extend
(
items
)
if
self
.
limit
>
0
:
samples_to_remove
=
len
(
self
.
items
)
-
self
.
limit
if
samples_to_remove
>
0
:
self
.
items
=
self
.
items
[
samples_to_remove
:]
def
clear
(
self
)
->
None
:
self
.
items
.
clear
()
@
torch
.
no_grad
()
def
sample
(
self
)
->
Experience
:
items
=
random
.
sample
(
self
.
items
,
self
.
sample_batch_size
)
experience
=
make_experience_batch
(
items
)
if
self
.
cpu_offload
:
experience
.
to_device
(
self
.
target_device
)
return
experience
def
__len__
(
self
)
->
int
:
return
len
(
self
.
items
)
def
__getitem__
(
self
,
idx
:
int
)
->
BufferItem
:
return
self
.
items
[
idx
]
def
collate_fn
(
self
,
batch
)
->
Experience
:
experience
=
make_experience_batch
(
batch
)
return
experience
applications/Chat/coati/replay_buffer/utils.py
0 → 100644
View file @
7bc5a8e3
from
dataclasses
import
dataclass
from
typing
import
List
,
Optional
import
torch
import
torch.nn.functional
as
F
from
coati.experience_maker.base
import
Experience
@
dataclass
class
BufferItem
:
"""BufferItem is an item of experience data.
Shapes of each tensor:
sequences: (S)
action_log_probs: (A)
values: (1)
reward: (1)
advantages: (1)
attention_mask: (S)
action_mask: (A)
"A" is the number of actions.
"""
sequences
:
torch
.
Tensor
action_log_probs
:
torch
.
Tensor
values
:
torch
.
Tensor
reward
:
torch
.
Tensor
advantages
:
torch
.
Tensor
attention_mask
:
Optional
[
torch
.
LongTensor
]
action_mask
:
Optional
[
torch
.
BoolTensor
]
def
split_experience_batch
(
experience
:
Experience
)
->
List
[
BufferItem
]:
batch_size
=
experience
.
sequences
.
size
(
0
)
batch_kwargs
=
[{}
for
_
in
range
(
batch_size
)]
keys
=
(
'sequences'
,
'action_log_probs'
,
'values'
,
'reward'
,
'advantages'
,
'attention_mask'
,
'action_mask'
)
for
key
in
keys
:
value
=
getattr
(
experience
,
key
)
if
isinstance
(
value
,
torch
.
Tensor
):
vals
=
torch
.
unbind
(
value
)
else
:
# None
vals
=
[
value
for
_
in
range
(
batch_size
)]
assert
batch_size
==
len
(
vals
)
for
i
,
v
in
enumerate
(
vals
):
batch_kwargs
[
i
][
key
]
=
v
items
=
[
BufferItem
(
**
kwargs
)
for
kwargs
in
batch_kwargs
]
return
items
def
zero_pad_sequences
(
sequences
:
List
[
torch
.
Tensor
],
side
:
str
=
'left'
)
->
torch
.
Tensor
:
assert
side
in
(
'left'
,
'right'
)
max_len
=
max
(
seq
.
size
(
0
)
for
seq
in
sequences
)
padded_sequences
=
[]
for
seq
in
sequences
:
pad_len
=
max_len
-
seq
.
size
(
0
)
padding
=
(
pad_len
,
0
)
if
side
==
'left'
else
(
0
,
pad_len
)
padded_sequences
.
append
(
F
.
pad
(
seq
,
padding
))
return
torch
.
stack
(
padded_sequences
,
dim
=
0
)
def
make_experience_batch
(
items
:
List
[
BufferItem
])
->
Experience
:
kwargs
=
{}
to_pad_keys
=
set
((
'action_log_probs'
,
'action_mask'
))
keys
=
(
'sequences'
,
'action_log_probs'
,
'values'
,
'reward'
,
'advantages'
,
'attention_mask'
,
'action_mask'
)
for
key
in
keys
:
vals
=
[
getattr
(
item
,
key
)
for
item
in
items
]
if
key
in
to_pad_keys
:
batch_data
=
zero_pad_sequences
(
vals
)
else
:
batch_data
=
torch
.
stack
(
vals
,
dim
=
0
)
kwargs
[
key
]
=
batch_data
return
Experience
(
**
kwargs
)
applications/Chat/coati/trainer/__init__.py
0 → 100644
View file @
7bc5a8e3
from
.base
import
Trainer
from
.ppo
import
PPOTrainer
from
.rm
import
RewardModelTrainer
from
.sft
import
SFTTrainer
__all__
=
[
'Trainer'
,
'PPOTrainer'
,
'RewardModelTrainer'
,
'SFTTrainer'
]
applications/Chat/coati/trainer/base.py
0 → 100644
View file @
7bc5a8e3
from
abc
import
ABC
,
abstractmethod
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Union
import
torch
from
coati.experience_maker
import
Experience
from
.callbacks
import
Callback
from
.strategies
import
Strategy
class
Trainer
(
ABC
):
"""
Base class for rlhf trainers.
Args:
strategy (Strategy):the strategy to use for training
max_epochs (int, defaults to 1): the number of epochs of training process
dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader
callbacks (List[Callback], defaults to []): the callbacks to call during training process
generate_kwargs (dict, optional): the kwargs to use while model generating
"""
def
__init__
(
self
,
strategy
:
Strategy
,
max_epochs
:
int
=
1
,
dataloader_pin_memory
:
bool
=
True
,
callbacks
:
List
[
Callback
]
=
[],
**
generate_kwargs
)
->
None
:
super
().
__init__
()
self
.
strategy
=
strategy
self
.
max_epochs
=
max_epochs
self
.
generate_kwargs
=
generate_kwargs
self
.
dataloader_pin_memory
=
dataloader_pin_memory
self
.
callbacks
=
callbacks
# TODO(ver217): maybe simplify these code using context
def
_on_fit_start
(
self
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_fit_start
()
def
_on_fit_end
(
self
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_fit_end
()
def
_on_episode_start
(
self
,
episode
:
int
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_episode_start
(
episode
)
def
_on_episode_end
(
self
,
episode
:
int
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_episode_end
(
episode
)
def
_on_make_experience_start
(
self
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_make_experience_start
()
def
_on_make_experience_end
(
self
,
experience
:
Experience
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_make_experience_end
(
experience
)
def
_on_learn_epoch_start
(
self
,
epoch
:
int
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_learn_epoch_start
(
epoch
)
def
_on_learn_epoch_end
(
self
,
epoch
:
int
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_learn_epoch_end
(
epoch
)
def
_on_learn_batch_start
(
self
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_learn_batch_start
()
def
_on_learn_batch_end
(
self
,
metrics
:
dict
,
experience
:
Experience
)
->
None
:
for
callback
in
self
.
callbacks
:
callback
.
on_learn_batch_end
(
metrics
,
experience
)
applications/Chat/coati/trainer/callbacks/__init__.py
0 → 100644
View file @
7bc5a8e3
from
.base
import
Callback
from
.performance_evaluator
import
PerformanceEvaluator
from
.save_checkpoint
import
SaveCheckpoint
__all__
=
[
'Callback'
,
'PerformanceEvaluator'
,
'SaveCheckpoint'
]
applications/Chat/coati/trainer/callbacks/base.py
0 → 100644
View file @
7bc5a8e3
from
abc
import
ABC
from
coati.experience_maker
import
Experience
class
Callback
(
ABC
):
"""
Base callback class. It defines the interface for callbacks.
"""
def
on_fit_start
(
self
)
->
None
:
pass
def
on_fit_end
(
self
)
->
None
:
pass
def
on_episode_start
(
self
,
episode
:
int
)
->
None
:
pass
def
on_episode_end
(
self
,
episode
:
int
)
->
None
:
pass
def
on_make_experience_start
(
self
)
->
None
:
pass
def
on_make_experience_end
(
self
,
experience
:
Experience
)
->
None
:
pass
def
on_learn_epoch_start
(
self
,
epoch
:
int
)
->
None
:
pass
def
on_learn_epoch_end
(
self
,
epoch
:
int
)
->
None
:
pass
def
on_learn_batch_start
(
self
)
->
None
:
pass
def
on_learn_batch_end
(
self
,
metrics
:
dict
,
experience
:
Experience
)
->
None
:
pass
applications/Chat/coati/trainer/callbacks/performance_evaluator.py
0 → 100644
View file @
7bc5a8e3
from
time
import
time
from
typing
import
Optional
import
torch
import
torch.distributed
as
dist
from
coati.experience_maker
import
Experience
from
.base
import
Callback
def
get_world_size
()
->
int
:
if
dist
.
is_initialized
():
return
dist
.
get_world_size
()
return
1
def
print_rank_0
(
*
args
,
**
kwargs
)
->
None
:
if
not
dist
.
is_initialized
()
or
dist
.
get_rank
()
==
0
:
print
(
*
args
,
**
kwargs
)
def
divide
(
x
:
float
,
y
:
float
)
->
float
:
if
y
==
0
:
return
float
(
'inf'
)
elif
y
==
float
(
'inf'
):
return
float
(
'nan'
)
return
x
/
y
@
torch
.
no_grad
()
def
all_reduce_mean
(
x
:
float
,
world_size
:
int
)
->
float
:
if
world_size
==
1
:
return
x
tensor
=
torch
.
tensor
([
x
],
device
=
torch
.
cuda
.
current_device
())
dist
.
all_reduce
(
tensor
)
tensor
=
tensor
/
world_size
return
tensor
.
item
()
class
Timer
:
def
__init__
(
self
)
->
None
:
self
.
start_time
:
Optional
[
float
]
=
None
self
.
duration
:
float
=
0.
def
start
(
self
)
->
None
:
self
.
start_time
=
time
()
def
end
(
self
)
->
None
:
assert
self
.
start_time
is
not
None
self
.
duration
+=
time
()
-
self
.
start_time
self
.
start_time
=
None
def
reset
(
self
)
->
None
:
self
.
duration
=
0.
class
PerformanceEvaluator
(
Callback
):
"""
Callback for valuate the performance of the model.
Args:
actor_num_params: The number of parameters of the actor model.
critic_num_params: The number of parameters of the critic model.
initial_model_num_params: The number of parameters of the initial model.
reward_model_num_params: The number of parameters of the reward model.
enable_grad_checkpoint: Whether to enable gradient checkpointing.
ignore_episodes: The number of episodes to ignore when calculating the performance.
"""
def
__init__
(
self
,
actor_num_params
:
int
,
critic_num_params
:
int
,
initial_model_num_params
:
int
,
reward_model_num_params
:
int
,
enable_grad_checkpoint
:
bool
=
False
,
ignore_episodes
:
int
=
0
)
->
None
:
super
().
__init__
()
self
.
world_size
=
get_world_size
()
self
.
actor_num_params
=
actor_num_params
self
.
critic_num_params
=
critic_num_params
self
.
initial_model_num_params
=
initial_model_num_params
self
.
reward_model_num_params
=
reward_model_num_params
self
.
enable_grad_checkpoint
=
enable_grad_checkpoint
self
.
ignore_episodes
=
ignore_episodes
self
.
disable
:
bool
=
False
self
.
overall_timer
=
Timer
()
self
.
make_experience_timer
=
Timer
()
self
.
learn_timer
=
Timer
()
self
.
make_experience_num_samples
:
int
=
0
self
.
make_experience_flop
:
int
=
0
self
.
learn_num_samples
:
int
=
0
self
.
learn_flop
:
int
=
0
def
on_episode_start
(
self
,
episode
:
int
)
->
None
:
self
.
disable
=
self
.
ignore_episodes
>
0
and
episode
<
self
.
ignore_episodes
if
self
.
disable
:
return
self
.
overall_timer
.
start
()
def
on_episode_end
(
self
,
episode
:
int
)
->
None
:
if
self
.
disable
:
return
self
.
overall_timer
.
end
()
def
on_make_experience_start
(
self
)
->
None
:
if
self
.
disable
:
return
self
.
make_experience_timer
.
start
()
def
on_make_experience_end
(
self
,
experience
:
Experience
)
->
None
:
if
self
.
disable
:
return
self
.
make_experience_timer
.
end
()
batch_size
,
seq_len
=
experience
.
sequences
.
shape
self
.
make_experience_num_samples
+=
batch_size
# actor generate
num_actions
=
experience
.
action_mask
.
size
(
1
)
input_len
=
seq_len
-
num_actions
total_seq_len
=
(
input_len
+
seq_len
-
1
)
*
num_actions
/
2
self
.
make_experience_flop
+=
self
.
actor_num_params
*
batch_size
*
total_seq_len
*
2
# actor forward
self
.
make_experience_flop
+=
self
.
actor_num_params
*
batch_size
*
seq_len
*
2
# critic forward
self
.
make_experience_flop
+=
self
.
critic_num_params
*
batch_size
*
seq_len
*
2
# initial model forward
self
.
make_experience_flop
+=
self
.
initial_model_num_params
*
batch_size
*
seq_len
*
2
# reward model forward
self
.
make_experience_flop
+=
self
.
reward_model_num_params
*
batch_size
*
seq_len
*
2
def
on_learn_batch_start
(
self
)
->
None
:
if
self
.
disable
:
return
self
.
learn_timer
.
start
()
def
on_learn_batch_end
(
self
,
metrics
:
dict
,
experience
:
Experience
)
->
None
:
if
self
.
disable
:
return
self
.
learn_timer
.
end
()
batch_size
,
seq_len
=
experience
.
sequences
.
shape
self
.
learn_num_samples
+=
batch_size
# actor forward-backward, 3 means forward(1) + backward(2)
self
.
learn_flop
+=
self
.
actor_num_params
*
batch_size
*
seq_len
*
2
*
(
3
+
int
(
self
.
enable_grad_checkpoint
))
# critic forward-backward
self
.
learn_flop
+=
self
.
critic_num_params
*
batch_size
*
seq_len
*
2
*
(
3
+
int
(
self
.
enable_grad_checkpoint
))
def
on_fit_end
(
self
)
->
None
:
avg_make_experience_duration
=
all_reduce_mean
(
self
.
make_experience_timer
.
duration
,
self
.
world_size
)
avg_learn_duration
=
all_reduce_mean
(
self
.
learn_timer
.
duration
,
self
.
world_size
)
avg_overall_duration
=
all_reduce_mean
(
self
.
overall_timer
.
duration
,
self
.
world_size
)
avg_make_experience_throughput
=
self
.
make_experience_num_samples
*
\
self
.
world_size
/
(
avg_make_experience_duration
+
1e-12
)
avg_make_experience_tflops
=
self
.
make_experience_flop
/
1e12
/
(
avg_make_experience_duration
+
1e-12
)
avg_learn_throughput
=
self
.
learn_num_samples
*
self
.
world_size
/
(
avg_learn_duration
+
1e-12
)
avg_learn_tflops
=
self
.
learn_flop
/
1e12
/
(
avg_learn_duration
+
1e-12
)
num_effective_samples
=
min
(
self
.
learn_num_samples
,
self
.
make_experience_num_samples
)
*
self
.
world_size
avg_overall_throughput
=
num_effective_samples
/
(
avg_overall_duration
+
1e-12
)
overall_time_per_sample
=
divide
(
1
,
avg_overall_throughput
)
make_experience_time_per_sample
=
divide
(
avg_make_experience_duration
,
num_effective_samples
)
learn_time_per_sample
=
divide
(
avg_learn_duration
,
num_effective_samples
)
print_rank_0
(
f
'Performance summary:
\n
'
+
f
'Generate
{
self
.
make_experience_num_samples
*
self
.
world_size
}
samples, throughput:
{
avg_make_experience_throughput
:.
2
f
}
samples/s, TFLOPS per GPU:
{
avg_make_experience_tflops
:.
2
f
}
\n
'
+
f
'Train
{
self
.
learn_num_samples
*
self
.
world_size
}
samples, throughput:
{
avg_learn_throughput
:.
2
f
}
samples/s, TFLOPS per GPU:
{
avg_learn_tflops
:.
2
f
}
\n
'
+
f
'Overall throughput:
{
avg_overall_throughput
:.
2
f
}
samples/s
\n
'
+
f
'Overall time per sample:
{
overall_time_per_sample
:.
2
f
}
s
\n
'
+
f
'Make experience time per sample:
{
make_experience_time_per_sample
:.
2
f
}
s,
{
make_experience_time_per_sample
/
overall_time_per_sample
*
100
:.
2
f
}
%
\n
'
+
f
'Learn time per sample:
{
learn_time_per_sample
:.
2
f
}
s,
{
learn_time_per_sample
/
overall_time_per_sample
*
100
:.
2
f
}
%'
)
applications/Chat/coati/trainer/callbacks/save_checkpoint.py
0 → 100644
View file @
7bc5a8e3
import
os
import
torch.distributed
as
dist
from
coati.trainer.strategies
import
ColossalAIStrategy
,
Strategy
from
coati.trainer.utils
import
is_rank_0
from
torch
import
nn
from
torch.optim
import
Optimizer
from
.base
import
Callback
class
SaveCheckpoint
(
Callback
):
"""
The callback for saving checkpoint for coati.
Only support saving actor and critic model.
A typical architecture of the saved checkpoint would be:
- checkpoint
- episode_x
- actor.pt
- actor-optim-rank-0.pt
- actor-optim-rank-1.pt
- critic.pt
- critic-optim-rank-0.pt
- critic-optim-rank-1.pt
- ...
Args:
path(str): the base path you want to save checkpoint, the checkpoint would be saved at `path/checkpoint`
interval(int): the interval episode of saving checkpoint
strategy(Strategy): the strategy used to train
actor(nn.Module): the actor model
critic(nn.Module): the critic model
actor_optim(Optimizer): the optimizer of actor
critic_optim(Optimizer): the optimizer of critic
"""
def
__init__
(
self
,
path
:
str
,
interval
:
int
,
strategy
:
Strategy
,
actor
:
nn
.
Module
=
None
,
critic
:
nn
.
Module
=
None
,
actor_optim
:
Optimizer
=
None
,
critic_optim
:
Optimizer
=
None
)
->
None
:
super
().
__init__
()
self
.
path
=
os
.
path
.
join
(
path
,
'checkpoint'
)
self
.
interval
=
interval
self
.
strategy
=
strategy
self
.
model_dict
=
{
'actor'
:
[
actor
,
actor_optim
],
'critic'
:
[
critic
,
critic_optim
]}
def
on_episode_end
(
self
,
episode
:
int
)
->
None
:
if
(
episode
+
1
)
%
self
.
interval
!=
0
:
return
base_path
=
os
.
path
.
join
(
self
.
path
,
f
'episode_
{
episode
}
'
)
if
not
os
.
path
.
exists
(
base_path
):
os
.
makedirs
(
base_path
)
for
model
in
self
.
model_dict
.
keys
():
# save model
if
self
.
model_dict
[
model
][
0
]
is
None
:
# saving only optimizer states is meaningless, so it would be skipped
continue
model_path
=
os
.
path
.
join
(
base_path
,
f
'
{
model
}
.pt'
)
self
.
strategy
.
save_model
(
model
=
self
.
model_dict
[
model
][
0
],
path
=
model_path
,
only_rank0
=
True
)
# save optimizer
if
self
.
model_dict
[
model
][
1
]
is
None
:
continue
only_rank0
=
not
isinstance
(
self
.
strategy
,
ColossalAIStrategy
)
rank
=
0
if
is_rank_0
()
else
dist
.
get_rank
()
optim_path
=
os
.
path
.
join
(
base_path
,
f
'
{
model
}
-optim-rank-
{
rank
}
.pt'
)
self
.
strategy
.
save_optimizer
(
optimizer
=
self
.
model_dict
[
model
][
1
],
path
=
optim_path
,
only_rank0
=
only_rank0
)
applications/Chat/coati/trainer/ppo.py
0 → 100644
View file @
7bc5a8e3
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Union
import
torch
import
torch.nn
as
nn
from
coati.experience_maker
import
Experience
,
NaiveExperienceMaker
from
coati.models.base
import
Actor
,
Critic
from
coati.models.loss
import
GPTLMLoss
,
PolicyLoss
,
ValueLoss
from
coati.replay_buffer
import
NaiveReplayBuffer
from
torch
import
Tensor
from
torch.optim
import
Optimizer
from
torch.utils.data
import
DistributedSampler
from
tqdm
import
tqdm
from
transformers.tokenization_utils_base
import
PreTrainedTokenizerBase
from
colossalai.utils
import
get_current_device
from
.base
import
Trainer
from
.callbacks
import
Callback
from
.strategies
import
Strategy
from
.utils
import
is_rank_0
,
to_device
class
PPOTrainer
(
Trainer
):
"""
Trainer for PPO algorithm.
Args:
strategy (Strategy): the strategy to use for training
actor (Actor): the actor model in ppo algorithm
critic (Critic): the critic model in ppo algorithm
reward_model (nn.Module): the reward model in rlhf algorithm to make reward of sentences
initial_model (Actor): the initial model in rlhf algorithm to generate reference logics to limit the update of actor
actor_optim (Optimizer): the optimizer to use for actor model
critic_optim (Optimizer): the optimizer to use for critic model
kl_coef (float, defaults to 0.1): the coefficient of kl divergence loss
train_batch_size (int, defaults to 8): the batch size to use for training
buffer_limit (int, defaults to 0): the max_size limitation of replay buffer
buffer_cpu_offload (bool, defaults to True): whether to offload replay buffer to cpu
eps_clip (float, defaults to 0.2): the clip coefficient of policy loss
vf_coef (float, defaults to 1.0): the coefficient of value loss
ptx_coef (float, defaults to 0.9): the coefficient of ptx loss
value_clip (float, defaults to 0.4): the clip coefficient of value loss
max_epochs (int, defaults to 1): the number of epochs of training process
sample_replay_buffer (bool, defaults to False): whether to sample from replay buffer
dataloader_pin_memory (bool, defaults to True): whether to pin memory for data loader
offload_inference_models (bool, defaults to True): whether to offload inference models to cpu during training process
callbacks (List[Callback], defaults to []): the callbacks to call during training process
generate_kwargs (dict, optional): the kwargs to use while model generating
"""
def
__init__
(
self
,
strategy
:
Strategy
,
actor
:
Actor
,
critic
:
Critic
,
reward_model
:
nn
.
Module
,
initial_model
:
Actor
,
actor_optim
:
Optimizer
,
critic_optim
:
Optimizer
,
kl_coef
:
float
=
0.1
,
ptx_coef
:
float
=
0.9
,
train_batch_size
:
int
=
8
,
buffer_limit
:
int
=
0
,
buffer_cpu_offload
:
bool
=
True
,
eps_clip
:
float
=
0.2
,
vf_coef
:
float
=
1.0
,
value_clip
:
float
=
0.4
,
max_epochs
:
int
=
1
,
sample_replay_buffer
:
bool
=
False
,
dataloader_pin_memory
:
bool
=
True
,
offload_inference_models
:
bool
=
True
,
callbacks
:
List
[
Callback
]
=
[],
**
generate_kwargs
)
->
None
:
experience_maker
=
NaiveExperienceMaker
(
actor
,
critic
,
reward_model
,
initial_model
,
kl_coef
)
replay_buffer
=
NaiveReplayBuffer
(
train_batch_size
,
buffer_limit
,
buffer_cpu_offload
)
generate_kwargs
=
_set_default_generate_kwargs
(
strategy
,
generate_kwargs
,
actor
)
super
().
__init__
(
strategy
,
max_epochs
,
dataloader_pin_memory
,
callbacks
,
**
generate_kwargs
)
self
.
experience_maker
=
experience_maker
self
.
replay_buffer
=
replay_buffer
self
.
sample_replay_buffer
=
sample_replay_buffer
self
.
offload_inference_models
=
offload_inference_models
self
.
actor
=
actor
self
.
critic
=
critic
self
.
actor_loss_fn
=
PolicyLoss
(
eps_clip
)
self
.
critic_loss_fn
=
ValueLoss
(
value_clip
)
self
.
vf_coef
=
vf_coef
self
.
ptx_loss_fn
=
GPTLMLoss
()
self
.
ptx_coef
=
ptx_coef
self
.
actor_optim
=
actor_optim
self
.
critic_optim
=
critic_optim
self
.
device
=
get_current_device
()
def
_make_experience
(
self
,
inputs
:
Union
[
Tensor
,
Dict
[
str
,
Tensor
]])
->
Experience
:
if
isinstance
(
inputs
,
Tensor
):
return
self
.
experience_maker
.
make_experience
(
inputs
,
**
self
.
generate_kwargs
)
elif
isinstance
(
inputs
,
dict
):
return
self
.
experience_maker
.
make_experience
(
**
inputs
,
**
self
.
generate_kwargs
)
else
:
raise
ValueError
(
f
'Unsupported input type "
{
type
(
inputs
)
}
"'
)
def
_learn
(
self
):
# replay buffer may be empty at first, we should rebuild at each training
if
not
self
.
sample_replay_buffer
:
dataloader
=
self
.
strategy
.
setup_dataloader
(
self
.
replay_buffer
,
self
.
dataloader_pin_memory
)
if
self
.
sample_replay_buffer
:
pbar
=
tqdm
(
range
(
self
.
max_epochs
),
desc
=
'Train epoch'
,
disable
=
not
is_rank_0
())
for
_
in
pbar
:
experience
=
self
.
replay_buffer
.
sample
()
experience
.
to_device
(
self
.
device
)
metrics
=
self
.
training_step
(
experience
)
pbar
.
set_postfix
(
metrics
)
else
:
for
epoch
in
range
(
self
.
max_epochs
):
self
.
_on_learn_epoch_start
(
epoch
)
if
isinstance
(
dataloader
.
sampler
,
DistributedSampler
):
dataloader
.
sampler
.
set_epoch
(
epoch
)
pbar
=
tqdm
(
dataloader
,
desc
=
f
'Train epoch [
{
epoch
+
1
}
/
{
self
.
max_epochs
}
]'
,
disable
=
not
is_rank_0
())
for
experience
in
pbar
:
self
.
_on_learn_batch_start
()
experience
.
to_device
(
self
.
device
)
metrics
=
self
.
training_step
(
experience
)
self
.
_on_learn_batch_end
(
metrics
,
experience
)
pbar
.
set_postfix
(
metrics
)
self
.
_on_learn_epoch_end
(
epoch
)
def
fit
(
self
,
prompt_dataloader
,
pretrain_dataloader
,
num_episodes
:
int
=
50000
,
max_timesteps
:
int
=
500
,
update_timesteps
:
int
=
5000
)
->
None
:
time
=
0
self
.
pretrain_dataloader
=
pretrain_dataloader
self
.
prompt_dataloader
=
prompt_dataloader
self
.
_on_fit_start
()
for
episode
in
range
(
num_episodes
):
self
.
_on_episode_start
(
episode
)
for
timestep
in
tqdm
(
range
(
max_timesteps
),
desc
=
f
'Episode [
{
episode
+
1
}
/
{
num_episodes
}
]'
,
disable
=
not
is_rank_0
()):
time
+=
1
prompts
=
next
(
iter
(
self
.
prompt_dataloader
))
self
.
_on_make_experience_start
()
if
self
.
offload_inference_models
:
# TODO(ver217): this may be controlled by strategy if they are prepared by strategy
self
.
experience_maker
.
initial_model
.
to
(
self
.
device
)
self
.
experience_maker
.
reward_model
.
to
(
self
.
device
)
experience
=
self
.
_make_experience
(
prompts
)
self
.
_on_make_experience_end
(
experience
)
self
.
replay_buffer
.
append
(
experience
)
if
time
%
update_timesteps
==
0
:
if
self
.
offload_inference_models
:
self
.
experience_maker
.
initial_model
.
to
(
'cpu'
)
self
.
experience_maker
.
reward_model
.
to
(
'cpu'
)
self
.
_learn
()
self
.
replay_buffer
.
clear
()
self
.
_on_episode_end
(
episode
)
self
.
_on_fit_end
()
def
training_step
(
self
,
experience
:
Experience
)
->
Dict
[
str
,
float
]:
self
.
actor
.
train
()
self
.
critic
.
train
()
# policy loss
num_actions
=
experience
.
action_mask
.
size
(
1
)
action_log_probs
=
self
.
actor
(
experience
.
sequences
,
num_actions
,
attention_mask
=
experience
.
attention_mask
)
actor_loss
=
self
.
actor_loss_fn
(
action_log_probs
,
experience
.
action_log_probs
,
experience
.
advantages
,
action_mask
=
experience
.
action_mask
)
# ptx loss
if
self
.
ptx_coef
!=
0
:
batch
=
next
(
iter
(
self
.
pretrain_dataloader
))
batch
=
to_device
(
batch
,
self
.
device
)
ptx_log_probs
=
self
.
actor
.
get_base_model
()(
batch
[
'input_ids'
],
attention_mask
=
batch
[
'attention_mask'
])[
'logits'
]
ptx_loss
=
self
.
ptx_loss_fn
(
ptx_log_probs
,
batch
[
'labels'
])
actor_loss
=
ptx_loss
*
self
.
ptx_coef
+
actor_loss
*
(
1
-
self
.
ptx_coef
)
self
.
strategy
.
backward
(
actor_loss
,
self
.
actor
,
self
.
actor_optim
)
self
.
strategy
.
optimizer_step
(
self
.
actor_optim
)
self
.
actor_optim
.
zero_grad
()
# value loss
values
=
self
.
critic
(
experience
.
sequences
,
action_mask
=
experience
.
action_mask
,
attention_mask
=
experience
.
attention_mask
)
critic_loss
=
self
.
critic_loss_fn
(
values
,
experience
.
values
,
experience
.
reward
,
action_mask
=
experience
.
action_mask
)
critic_loss
=
critic_loss
*
self
.
vf_coef
self
.
strategy
.
backward
(
critic_loss
,
self
.
critic
,
self
.
critic_optim
)
self
.
strategy
.
optimizer_step
(
self
.
critic_optim
)
self
.
critic_optim
.
zero_grad
()
return
{
'reward'
:
experience
.
reward
.
mean
().
item
()}
def
_set_default_generate_kwargs
(
strategy
:
Strategy
,
generate_kwargs
:
dict
,
actor
:
Actor
)
->
None
:
origin_model
=
strategy
.
unwrap_model
(
actor
)
new_kwargs
=
{
**
generate_kwargs
}
# use huggingface models method directly
if
'prepare_inputs_fn'
not
in
generate_kwargs
and
hasattr
(
origin_model
,
'prepare_inputs_for_generation'
):
new_kwargs
[
'prepare_inputs_fn'
]
=
origin_model
.
prepare_inputs_for_generation
if
'update_model_kwargs_fn'
not
in
generate_kwargs
and
hasattr
(
origin_model
,
'_update_model_kwargs_for_generation'
):
new_kwargs
[
'update_model_kwargs_fn'
]
=
origin_model
.
_update_model_kwargs_for_generation
return
new_kwargs
applications/Chat/coati/trainer/rm.py
0 → 100644
View file @
7bc5a8e3
from
datetime
import
datetime
from
typing
import
List
,
Optional
import
pandas
as
pd
import
torch
import
torch.distributed
as
dist
from
torch.optim
import
Optimizer
,
lr_scheduler
from
torch.utils.data
import
DataLoader
,
Dataset
,
DistributedSampler
from
tqdm
import
tqdm
from
transformers.tokenization_utils_base
import
PreTrainedTokenizerBase
from
.base
import
Trainer
from
.callbacks
import
Callback
from
.strategies
import
Strategy
from
.utils
import
is_rank_0
class
RewardModelTrainer
(
Trainer
):
"""
Trainer to use while training reward model.
Args:
model (torch.nn.Module): the model to train
strategy (Strategy): the strategy to use for training
optim(Optimizer): the optimizer to use for training
loss_fn (callable): the loss function to use for training
train_dataloader (DataLoader): the dataloader to use for training
valid_dataloader (DataLoader): the dataloader to use for validation
eval_dataloader (DataLoader): the dataloader to use for evaluation
batch_size (int, defaults to 1): the batch size while training
max_epochs (int, defaults to 2): the number of epochs to train
callbacks (List[Callback], defaults to []): the callbacks to call during training process
"""
def
__init__
(
self
,
model
,
strategy
:
Strategy
,
optim
:
Optimizer
,
loss_fn
,
train_dataloader
:
DataLoader
,
valid_dataloader
:
DataLoader
,
eval_dataloader
:
DataLoader
,
max_epochs
:
int
=
1
,
callbacks
:
List
[
Callback
]
=
[],
)
->
None
:
super
().
__init__
(
strategy
,
max_epochs
,
callbacks
=
callbacks
)
self
.
train_dataloader
=
train_dataloader
self
.
valid_dataloader
=
valid_dataloader
self
.
eval_dataloader
=
eval_dataloader
self
.
model
=
model
self
.
loss_fn
=
loss_fn
self
.
optimizer
=
optim
self
.
scheduler
=
lr_scheduler
.
CosineAnnealingLR
(
self
.
optimizer
,
self
.
train_dataloader
.
__len__
()
//
100
)
def
eval_acc
(
self
,
dataloader
):
dist
=
0
on
=
0
cnt
=
0
self
.
model
.
eval
()
with
torch
.
no_grad
():
for
chosen_ids
,
c_mask
,
reject_ids
,
r_mask
in
dataloader
:
chosen_ids
=
chosen_ids
.
squeeze
(
1
).
to
(
torch
.
cuda
.
current_device
())
c_mask
=
c_mask
.
squeeze
(
1
).
to
(
torch
.
cuda
.
current_device
())
reject_ids
=
reject_ids
.
squeeze
(
1
).
to
(
torch
.
cuda
.
current_device
())
r_mask
=
r_mask
.
squeeze
(
1
).
to
(
torch
.
cuda
.
current_device
())
chosen_reward
=
self
.
model
(
chosen_ids
,
attention_mask
=
c_mask
)
reject_reward
=
self
.
model
(
reject_ids
,
attention_mask
=
r_mask
)
for
i
in
range
(
len
(
chosen_reward
)):
cnt
+=
1
if
chosen_reward
[
i
]
>
reject_reward
[
i
]:
on
+=
1
dist
+=
(
chosen_reward
-
reject_reward
).
mean
().
item
()
dist_mean
=
dist
/
len
(
dataloader
)
acc
=
on
/
cnt
self
.
model
.
train
()
return
dist_mean
,
acc
def
fit
(
self
):
time
=
datetime
.
now
()
epoch_bar
=
tqdm
(
range
(
self
.
max_epochs
),
desc
=
'Train epoch'
,
disable
=
not
is_rank_0
())
for
epoch
in
range
(
self
.
max_epochs
):
step_bar
=
tqdm
(
range
(
self
.
train_dataloader
.
__len__
()),
desc
=
'Train step of epoch %d'
%
epoch
,
disable
=
not
is_rank_0
())
# train
self
.
model
.
train
()
cnt
=
0
acc
=
0
dist
=
0
for
chosen_ids
,
c_mask
,
reject_ids
,
r_mask
in
self
.
train_dataloader
:
chosen_ids
=
chosen_ids
.
squeeze
(
1
).
to
(
torch
.
cuda
.
current_device
())
c_mask
=
c_mask
.
squeeze
(
1
).
to
(
torch
.
cuda
.
current_device
())
reject_ids
=
reject_ids
.
squeeze
(
1
).
to
(
torch
.
cuda
.
current_device
())
r_mask
=
r_mask
.
squeeze
(
1
).
to
(
torch
.
cuda
.
current_device
())
chosen_reward
=
self
.
model
(
chosen_ids
,
attention_mask
=
c_mask
)
reject_reward
=
self
.
model
(
reject_ids
,
attention_mask
=
r_mask
)
loss
=
self
.
loss_fn
(
chosen_reward
,
reject_reward
)
self
.
strategy
.
backward
(
loss
,
self
.
model
,
self
.
optimizer
)
self
.
strategy
.
optimizer_step
(
self
.
optimizer
)
self
.
optimizer
.
zero_grad
()
cnt
+=
1
if
cnt
==
100
:
self
.
scheduler
.
step
()
dist
,
acc
=
self
.
eval_acc
(
self
.
valid_dataloader
)
cnt
=
0
if
is_rank_0
():
log
=
pd
.
DataFrame
([[
step_bar
.
n
,
loss
.
item
(),
dist
,
acc
]],
columns
=
[
'step'
,
'loss'
,
'dist'
,
'acc'
])
log
.
to_csv
(
'log_%s.csv'
%
time
,
mode
=
'a'
,
header
=
False
,
index
=
False
)
step_bar
.
update
()
step_bar
.
set_postfix
({
'dist'
:
dist
,
'acc'
:
acc
})
# eval
dist
,
acc
=
self
.
eval_acc
(
self
.
eval_dataloader
)
if
is_rank_0
():
log
=
pd
.
DataFrame
([[
step_bar
.
n
,
loss
.
item
(),
dist
,
acc
]],
columns
=
[
'step'
,
'loss'
,
'dist'
,
'acc'
])
log
.
to_csv
(
'log.csv'
,
mode
=
'a'
,
header
=
False
,
index
=
False
)
epoch_bar
.
update
()
step_bar
.
set_postfix
({
'dist'
:
dist
,
'acc'
:
acc
})
step_bar
.
close
()
applications/Chat/coati/trainer/sft.py
0 → 100644
View file @
7bc5a8e3
import
math
import
time
from
typing
import
List
,
Optional
import
torch
import
torch.distributed
as
dist
import
wandb
from
torch.optim
import
Optimizer
from
torch.utils.data
import
DataLoader
from
tqdm
import
tqdm
from
transformers.tokenization_utils_base
import
PreTrainedTokenizerBase
from
transformers.trainer
import
get_scheduler
from
.base
import
Trainer
from
.callbacks
import
Callback
from
.strategies
import
ColossalAIStrategy
,
Strategy
from
.utils
import
is_rank_0
,
to_device
class
SFTTrainer
(
Trainer
):
"""
Trainer to use while training reward model.
Args:
model (torch.nn.Module): the model to train
strategy (Strategy): the strategy to use for training
optim(Optimizer): the optimizer to use for training
train_dataloader: the dataloader to use for training
eval_dataloader: the dataloader to use for evaluation
batch_size (int, defaults to 1): the batch size while training
max_epochs (int, defaults to 2): the number of epochs to train
callbacks (List[Callback], defaults to []): the callbacks to call during training process
optim_kwargs (dict, defaults to {'lr':1e-4}): the kwargs to use while initializing optimizer
"""
def
__init__
(
self
,
model
,
strategy
:
Strategy
,
optim
:
Optimizer
,
train_dataloader
:
DataLoader
,
eval_dataloader
:
DataLoader
=
None
,
max_epochs
:
int
=
2
,
accumulation_steps
:
int
=
8
,
callbacks
:
List
[
Callback
]
=
[],
)
->
None
:
if
accumulation_steps
>
1
and
isinstance
(
strategy
,
ColossalAIStrategy
)
and
strategy
.
stage
==
3
:
raise
ValueError
(
"Accumulation steps are not supported in stage 3 of ColossalAI"
)
super
().
__init__
(
strategy
,
max_epochs
,
callbacks
=
callbacks
)
self
.
train_dataloader
=
train_dataloader
self
.
eval_dataloader
=
eval_dataloader
self
.
model
=
model
self
.
optimizer
=
optim
self
.
accumulation_steps
=
accumulation_steps
num_update_steps_per_epoch
=
len
(
train_dataloader
)
//
self
.
accumulation_steps
max_steps
=
math
.
ceil
(
self
.
max_epochs
*
num_update_steps_per_epoch
)
self
.
scheduler
=
get_scheduler
(
"cosine"
,
self
.
optimizer
,
num_warmup_steps
=
math
.
ceil
(
max_steps
*
0.03
),
num_training_steps
=
max_steps
)
def
fit
(
self
,
logger
,
use_wandb
:
bool
=
False
):
if
use_wandb
:
wandb
.
init
(
project
=
"Coati"
,
name
=
time
.
strftime
(
"%Y-%m-%d %H:%M:%S"
,
time
.
localtime
()))
wandb
.
watch
(
self
.
model
)
total_loss
=
0
# epoch_bar = tqdm(range(self.epochs), desc='Epochs', disable=not is_rank_0())
step_bar
=
tqdm
(
range
(
len
(
self
.
train_dataloader
)
//
self
.
accumulation_steps
*
self
.
max_epochs
),
desc
=
f
'steps'
,
disable
=
not
is_rank_0
())
for
epoch
in
range
(
self
.
max_epochs
):
# process_bar = tqdm(range(len(self.train_dataloader)), desc=f'Train process for{epoch}', disable=not is_rank_0())
# train
self
.
model
.
train
()
for
batch_id
,
batch
in
enumerate
(
self
.
train_dataloader
):
batch
=
to_device
(
batch
,
torch
.
cuda
.
current_device
())
outputs
=
self
.
model
(
batch
[
"input_ids"
],
attention_mask
=
batch
[
"attention_mask"
],
labels
=
batch
[
"labels"
])
loss
=
outputs
.
loss
if
loss
>=
2.5
and
is_rank_0
():
logger
.
warning
(
f
"batch_id:
{
batch_id
}
, abnormal loss:
{
loss
}
"
)
loss
=
loss
/
self
.
accumulation_steps
self
.
strategy
.
backward
(
loss
,
self
.
model
,
self
.
optimizer
)
total_loss
+=
loss
.
item
()
# gradient accumulation
if
(
batch_id
+
1
)
%
self
.
accumulation_steps
==
0
:
self
.
strategy
.
optimizer_step
(
self
.
optimizer
)
self
.
optimizer
.
zero_grad
()
self
.
scheduler
.
step
()
if
is_rank_0
()
and
use_wandb
:
wandb
.
log
({
"loss"
:
total_loss
/
self
.
accumulation_steps
,
"lr"
:
self
.
scheduler
.
get_last_lr
()[
0
],
"epoch"
:
epoch
,
"batch_id"
:
batch_id
})
total_loss
=
0
step_bar
.
update
()
# if batch_id % log_interval == 0:
# logger.info(f'Train Epoch {epoch}/{self.epochs} Batch {batch_id} Rank {dist.get_rank()} loss {loss.item()}')
# wandb.log({"loss": loss.item()})
# process_bar.update()
# eval
if
self
.
eval_dataloader
is
not
None
:
self
.
model
.
eval
()
with
torch
.
no_grad
():
loss_sum
=
0
num_seen
=
0
for
batch
in
self
.
eval_dataloader
:
batch
=
to_device
(
batch
,
torch
.
cuda
.
current_device
())
outputs
=
self
.
model
(
batch
[
"input_ids"
],
attention_mask
=
batch
[
"attention_mask"
],
labels
=
batch
[
"labels"
])
loss
=
outputs
.
loss
loss_sum
+=
loss
.
item
()
num_seen
+=
batch
[
"input_ids"
].
size
(
0
)
loss_mean
=
loss_sum
/
num_seen
if
dist
.
get_rank
()
==
0
:
logger
.
info
(
f
'Eval Epoch
{
epoch
}
/
{
self
.
max_epochs
}
loss
{
loss_mean
}
'
)
# epoch_bar.update()
applications/Chat/coati/trainer/strategies/__init__.py
0 → 100644
View file @
7bc5a8e3
from
.base
import
Strategy
from
.colossalai
import
ColossalAIStrategy
from
.ddp
import
DDPStrategy
from
.naive
import
NaiveStrategy
__all__
=
[
'Strategy'
,
'NaiveStrategy'
,
'DDPStrategy'
,
'ColossalAIStrategy'
]
applications/Chat/coati/trainer/strategies/base.py
0 → 100644
View file @
7bc5a8e3
from
abc
import
ABC
,
abstractmethod
from
contextlib
import
nullcontext
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Union
import
torch
import
torch.nn
as
nn
from
coati.models.base
import
Actor
,
get_base_model
from
coati.replay_buffer
import
ReplayBuffer
from
torch.optim
import
Optimizer
from
torch.utils.data
import
DataLoader
from
transformers.tokenization_utils_base
import
PreTrainedTokenizerBase
from
.sampler
import
DistributedSampler
ModelOptimPair
=
Tuple
[
nn
.
Module
,
Optimizer
]
ModelOrModelOptimPair
=
Union
[
nn
.
Module
,
ModelOptimPair
]
class
Strategy
(
ABC
):
"""
Base class for training strategies.
"""
def
__init__
(
self
)
->
None
:
super
().
__init__
()
self
.
setup_distributed
()
@
abstractmethod
def
backward
(
self
,
loss
:
torch
.
Tensor
,
model
:
nn
.
Module
,
optimizer
:
Optimizer
,
**
kwargs
)
->
None
:
pass
@
abstractmethod
def
optimizer_step
(
self
,
optimizer
:
Optimizer
,
**
kwargs
)
->
None
:
pass
@
abstractmethod
def
setup_distributed
(
self
)
->
None
:
pass
@
abstractmethod
def
setup_model
(
self
,
model
:
nn
.
Module
)
->
nn
.
Module
:
pass
@
abstractmethod
def
setup_optimizer
(
self
,
optimizer
:
Optimizer
,
model
:
nn
.
Module
)
->
Optimizer
:
pass
@
abstractmethod
def
setup_dataloader
(
self
,
replay_buffer
:
ReplayBuffer
,
pin_memory
:
bool
=
False
)
->
DataLoader
:
pass
def
model_init_context
(
self
):
return
nullcontext
()
def
prepare
(
self
,
*
models_or_model_optim_pairs
:
ModelOrModelOptimPair
)
->
Union
[
List
[
ModelOrModelOptimPair
],
ModelOrModelOptimPair
]:
"""Prepare models or model-optimizer-pairs based on each strategy.
Example::
>>> # when fine-tuning actor and critic
>>> (actor, actor_optim), (critic, critic_optim), reward_model, initial_model = strategy.prepare((actor, actor_optim), (critic, critic_optim), reward_model, initial_model)
>>> # or when training reward model
>>> (reward_model, reward_model_optim) = strategy.prepare((reward_model, reward_model_optim))
>>> # or just inference
>>> actor, critic = strategy.prepare(actor, critic)
Returns:
Union[List[ModelOrModelOptimPair], ModelOrModelOptimPair]: Models or model-optimizer-pairs in the original order.
"""
def
prepare_model
(
model
:
nn
.
Module
):
if
isinstance
(
model
,
Actor
):
return
Actor
(
self
.
setup_model
(
model
.
get_base_model
()))
return
self
.
setup_model
(
model
)
rets
=
[]
for
arg
in
models_or_model_optim_pairs
:
if
isinstance
(
arg
,
tuple
):
assert
len
(
arg
)
==
2
,
f
'Expect (model, optimizer) pair, got a tuple with size "
{
len
(
arg
)
}
"'
model
,
optimizer
=
arg
model
=
prepare_model
(
model
)
optimizer
=
self
.
setup_optimizer
(
optimizer
,
get_base_model
(
model
))
rets
.
append
((
model
,
optimizer
))
elif
isinstance
(
arg
,
nn
.
Module
):
rets
.
append
(
prepare_model
(
arg
))
else
:
raise
RuntimeError
(
f
'Expect model or (model, optimizer) pair, got
{
type
(
arg
)
}
'
)
if
len
(
rets
)
==
1
:
return
rets
[
0
]
return
rets
@
staticmethod
def
unwrap_model
(
model
:
nn
.
Module
)
->
nn
.
Module
:
"""Get the unwrapped model from a wrapped model. Useful for getting original huggingface model.
For Actor, it will unwrap `actor.model`.
Args:
model (nn.Module): the model to unwrap
Returns:
nn.Module: the original model (usually a huggingface model)
"""
return
get_base_model
(
model
)
@
abstractmethod
def
save_model
(
self
,
model
:
nn
.
Module
,
path
:
str
,
only_rank0
:
bool
=
True
)
->
None
:
pass
@
abstractmethod
def
load_model
(
self
,
model
:
nn
.
Module
,
path
:
str
,
map_location
:
Any
=
None
,
strict
:
bool
=
True
)
->
None
:
pass
@
abstractmethod
def
save_optimizer
(
self
,
optimizer
:
Optimizer
,
path
:
str
,
only_rank0
:
bool
=
False
)
->
None
:
pass
@
abstractmethod
def
load_optimizer
(
self
,
optimizer
:
Optimizer
,
path
:
str
,
map_location
:
Any
=
None
)
->
None
:
pass
def
setup_sampler
(
self
,
dataset
)
->
DistributedSampler
:
return
DistributedSampler
(
dataset
,
1
,
0
)
@
abstractmethod
def
save_pretrained
(
self
,
model
:
nn
.
Module
,
path
:
str
,
only_rank0
:
bool
=
True
,
tokenizer
:
Optional
[
PreTrainedTokenizerBase
]
=
None
)
->
None
:
pass
applications/Chat/coati/trainer/strategies/colossalai.py
0 → 100644
View file @
7bc5a8e3
import
warnings
from
typing
import
Optional
,
Union
import
torch
import
torch.distributed
as
dist
import
torch.nn
as
nn
import
torch.optim
as
optim
from
coati.models.base
import
get_base_model
from
torch.optim
import
Optimizer
from
transformers.tokenization_utils_base
import
PreTrainedTokenizerBase
import
colossalai
from
colossalai.logging
import
get_dist_logger
from
colossalai.nn.optimizer
import
CPUAdam
,
HybridAdam
from
colossalai.tensor
import
ProcessGroup
,
ShardSpec
from
colossalai.utils
import
get_current_device
from
colossalai.zero
import
ColoInitContext
,
ZeroDDP
,
zero_model_wrapper
,
zero_optim_wrapper
from
.ddp
import
DDPStrategy
logger
=
get_dist_logger
(
__name__
)
class
ColossalAIStrategy
(
DDPStrategy
):
"""
The strategy for training with ColossalAI.
Args:
stage(int): The stage to use in ZeRO. Choose in (1, 2, 3)
precision(str): The precision to use. Choose in ('fp32', 'fp16'). Stage 3 only supports fp16.
seed(int): The seed for the random number generator.
shard_init(bool): Whether to shard the model parameters during initialization. Only for ZeRO-3.
This is not compativle with `from_pretrained()`. We temporarily disable this and will support it in the future.
placement_policy(str): The placement policy for gemini. Choose in ('cpu', 'cuda')
If it is “cpu”, parameters, gradients and optimizer states will be offloaded to CPU,
If it is “cuda”, they will not be offloaded, which means max CUDA memory will be used. It is the fastest.
pin_memory(bool): Whether to pin the memory for the data loader. Only for ZeRO-3.
force_outputs_fp32(bool): Whether to force the outputs to be fp32. Only for ZeRO-3.
search_range_mb(int): The search range in MB for the chunk size. Only for ZeRO-3.
hidden_dim(optional, int): The hidden dimension for the gemini. Only for ZeRO-3.
min_chunk_size_mb(float): The minimum chunk size in MB. Only for ZeRO-3.
gpu_margin_mem_ratio(float): The margin memory ratio for the GPU. Only for ZeRO-3.
reduce_bugket_size(int): The reduce bucket size in bytes. Only for ZeRO-1 and ZeRO-2.
overlap_communication(bool): Whether to overlap communication and computation. Only for ZeRO-1 and ZeRO-2.
initial_scale(float): The initial scale for the optimizer.
growth_factor(float): The growth factor for the optimizer.
backoff_factor(float): The backoff factor for the optimizer.
growth_interval(int): The growth interval for the optimizer.
hysteresis(int): The hysteresis for the optimizer.
min_scale(float): The minimum scale for the optimizer.
max_scale(float): The maximum scale for the optimizer.
max_norm(float): The maximum norm for the optimizer.
norm_type(float): The norm type for the optimizer.
"""
def
__init__
(
self
,
stage
:
int
=
3
,
precision
:
str
=
'fp16'
,
seed
:
int
=
42
,
shard_init
:
bool
=
False
,
# only for stage 3
placement_policy
:
str
=
'cuda'
,
pin_memory
:
bool
=
True
,
# only for stage 3
force_outputs_fp32
:
bool
=
False
,
# only for stage 3
scatter_after_inference
:
bool
=
False
,
# only for stage 3
search_range_mb
:
int
=
32
,
# only for stage 3
hidden_dim
:
Optional
[
int
]
=
None
,
# only for stage 3
min_chunk_size_mb
:
float
=
32
,
# only for stage 3
gpu_margin_mem_ratio
:
float
=
0.0
,
# only for stage 3
reduce_bucket_size
:
int
=
12
*
1024
**
2
,
# only for stage 1&2
overlap_communication
:
bool
=
True
,
# only for stage 1&2
initial_scale
:
float
=
2
**
16
,
growth_factor
:
float
=
2
,
backoff_factor
:
float
=
0.5
,
growth_interval
:
int
=
1000
,
hysteresis
:
int
=
2
,
min_scale
:
float
=
1
,
max_scale
:
float
=
2
**
32
,
max_norm
:
float
=
0.0
,
norm_type
:
float
=
2.0
)
->
None
:
super
().
__init__
(
seed
)
assert
placement_policy
in
(
'cpu'
,
'cuda'
),
f
'Unsupported placement policy "
{
placement_policy
}
"'
assert
precision
in
(
'fp32'
,
'fp16'
),
f
'Unsupported precision "
{
precision
}
"'
self
.
stage
=
stage
# TODO(ver217): support shard_init when using from_pretrained()
if
shard_init
:
warnings
.
warn
(
f
'Shard init is not supported model.from_pretrained() yet. Please load weights after strategy.prepare()'
)
if
stage
==
3
and
precision
==
'fp32'
:
warnings
.
warn
(
f
'Stage 3 only supports fp16. Precision is set to fp16.'
)
precision
=
'fp16'
self
.
precision
=
precision
self
.
shard_init
=
shard_init
self
.
gemini_config
=
dict
(
device
=
get_current_device
(),
placement_policy
=
placement_policy
,
pin_memory
=
pin_memory
,
force_outputs_fp32
=
force_outputs_fp32
,
strict_ddp_mode
=
shard_init
,
search_range_mb
=
search_range_mb
,
hidden_dim
=
hidden_dim
,
min_chunk_size_mb
=
min_chunk_size_mb
,
scatter_after_inference
=
scatter_after_inference
)
if
stage
==
3
:
self
.
zero_optim_config
=
dict
(
gpu_margin_mem_ratio
=
gpu_margin_mem_ratio
)
else
:
self
.
zero_optim_config
=
dict
(
reduce_bucket_size
=
reduce_bucket_size
,
overlap_communication
=
overlap_communication
,
cpu_offload
=
(
placement_policy
==
'cpu'
))
self
.
optim_kwargs
=
dict
(
initial_scale
=
initial_scale
,
growth_factor
=
growth_factor
,
backoff_factor
=
backoff_factor
,
growth_interval
=
growth_interval
,
hysteresis
=
hysteresis
,
min_scale
=
min_scale
,
max_scale
=
max_scale
,
max_norm
=
max_norm
,
norm_type
=
norm_type
)
def
setup_distributed
(
self
)
->
None
:
colossalai
.
launch_from_torch
({},
seed
=
self
.
seed
)
def
model_init_context
(
self
):
if
self
.
stage
==
3
:
world_size
=
dist
.
get_world_size
()
shard_pg
=
ProcessGroup
(
tp_degree
=
world_size
)
if
self
.
shard_init
else
None
default_dist_spec
=
ShardSpec
([
-
1
],
[
world_size
])
if
self
.
shard_init
else
None
return
ColoInitContext
(
device
=
get_current_device
(),
dtype
=
torch
.
half
,
default_pg
=
shard_pg
,
default_dist_spec
=
default_dist_spec
)
return
super
().
model_init_context
()
def
setup_model
(
self
,
model
:
nn
.
Module
)
->
nn
.
Module
:
model
=
zero_model_wrapper
(
model
,
zero_stage
=
self
.
stage
,
gemini_config
=
self
.
gemini_config
)
if
self
.
stage
!=
3
and
self
.
precision
==
'fp16'
:
model
=
model
.
half
().
cuda
()
return
model
def
setup_optimizer
(
self
,
optimizer
:
optim
.
Optimizer
,
model
:
nn
.
Module
)
->
optim
.
Optimizer
:
assert
isinstance
(
optimizer
,
(
CPUAdam
,
HybridAdam
)),
f
'Unsupported optimizer
{
type
(
optimizer
)
}
'
return
zero_optim_wrapper
(
model
,
optimizer
,
optim_config
=
self
.
zero_optim_config
,
**
self
.
optim_kwargs
)
def
backward
(
self
,
loss
:
torch
.
Tensor
,
model
:
nn
.
Module
,
optimizer
:
optim
.
Optimizer
,
**
kwargs
)
->
None
:
optimizer
.
backward
(
loss
)
def
optimizer_step
(
self
,
optimizer
:
optim
.
Optimizer
,
**
kwargs
)
->
None
:
optimizer
.
step
()
def
save_model
(
self
,
model
:
nn
.
Module
,
path
:
str
,
only_rank0
:
bool
=
True
)
->
None
:
if
only_rank0
and
dist
.
get_rank
()
!=
0
and
self
.
stage
!=
3
:
return
base_model
=
get_base_model
(
model
)
if
self
.
stage
==
3
:
assert
isinstance
(
base_model
,
ZeroDDP
)
# for stage 3, state_dict() method should be called on every rank
state_dict
=
base_model
.
state_dict
(
only_rank_0
=
only_rank0
)
else
:
# only_rank0 is false or rank == 0
state_dict
=
base_model
.
state_dict
()
if
only_rank0
and
dist
.
get_rank
()
!=
0
:
return
torch
.
save
(
state_dict
,
path
)
def
save_optimizer
(
self
,
optimizer
:
Optimizer
,
path
:
str
,
only_rank0
:
bool
=
False
)
->
None
:
if
only_rank0
:
raise
RuntimeError
(
f
'Optimizer states are sharded when using ColossalAIStrategy. Only rank0 is not supported.'
)
torch
.
save
(
optimizer
.
state_dict
(),
path
)
def
unwrap_model
(
self
,
model
:
nn
.
Module
)
->
nn
.
Module
:
base_model
:
Union
[
nn
.
Module
,
ZeroDDP
]
=
get_base_model
(
model
)
if
self
.
stage
==
3
:
assert
isinstance
(
base_model
,
ZeroDDP
)
return
base_model
.
module
return
base_model
def
save_pretrained
(
self
,
model
:
nn
.
Module
,
path
:
str
,
only_rank0
:
bool
=
True
,
tokenizer
:
Optional
[
PreTrainedTokenizerBase
]
=
None
)
->
None
:
if
self
.
stage
==
3
:
raise
RuntimeError
(
'ColossalAI strategy with stage-3 does not support save_pretrained() now'
)
super
().
save_pretrained
(
model
,
path
,
only_rank0
,
tokenizer
)
applications/Chat/coati/trainer/strategies/ddp.py
0 → 100644
View file @
7bc5a8e3
import
os
import
random
from
typing
import
Optional
import
numpy
as
np
import
torch
import
torch.distributed
as
dist
import
torch.nn
as
nn
from
coati.replay_buffer
import
ReplayBuffer
from
torch.nn.parallel
import
DistributedDataParallel
as
DDP
from
torch.optim
import
Optimizer
from
torch.utils.data
import
DataLoader
from
transformers.tokenization_utils_base
import
PreTrainedTokenizerBase
from
.naive
import
NaiveStrategy
from
.sampler
import
DistributedSampler
class
DDPStrategy
(
NaiveStrategy
):
"""
Strategy for distributed training using torch.distributed.
"""
def
__init__
(
self
,
seed
:
int
=
42
)
->
None
:
self
.
seed
=
seed
super
().
__init__
()
def
setup_distributed
(
self
)
->
None
:
try
:
rank
=
int
(
os
.
environ
[
'RANK'
])
local_rank
=
int
(
os
.
environ
[
'LOCAL_RANK'
])
world_size
=
int
(
os
.
environ
[
'WORLD_SIZE'
])
host
=
os
.
environ
[
'MASTER_ADDR'
]
port
=
int
(
os
.
environ
[
'MASTER_PORT'
])
except
KeyError
as
e
:
raise
RuntimeError
(
f
"Could not find
{
e
}
in the torch environment, visit https://www.colossalai.org/ for more information on launching with torch"
)
dist
.
init_process_group
(
'nccl'
,
init_method
=
f
'tcp://[
{
host
}
]:
{
port
}
'
,
world_size
=
world_size
,
rank
=
rank
)
self
.
set_seed
(
self
.
seed
)
torch
.
cuda
.
set_device
(
local_rank
)
def
set_seed
(
self
,
seed
:
int
)
->
None
:
random
.
seed
(
seed
)
np
.
random
.
seed
(
seed
)
torch
.
manual_seed
(
seed
)
def
setup_model
(
self
,
model
:
nn
.
Module
)
->
nn
.
Module
:
device
=
torch
.
cuda
.
current_device
()
return
DDP
(
model
,
device_ids
=
[
device
])
def
setup_dataloader
(
self
,
replay_buffer
:
ReplayBuffer
,
pin_memory
:
bool
=
False
)
->
DataLoader
:
# DDP only mode, replay buffers on each rank are different.
# sampler = DistributedSampler(replay_buffer,
# num_replicas=dist.get_world_size(),
# rank=dist.get_rank(),
# shuffle=True,
# seed=self.seed,
# drop_last=True)
return
DataLoader
(
replay_buffer
,
batch_size
=
replay_buffer
.
sample_batch_size
,
# sampler=sampler,
shuffle
=
True
,
drop_last
=
True
,
pin_memory
=
pin_memory
,
collate_fn
=
replay_buffer
.
collate_fn
)
def
save_model
(
self
,
model
:
nn
.
Module
,
path
:
str
,
only_rank0
:
bool
=
True
)
->
None
:
if
only_rank0
and
dist
.
get_rank
()
!=
0
:
return
super
().
save_model
(
model
,
path
,
only_rank0
)
def
save_optimizer
(
self
,
optimizer
:
Optimizer
,
path
:
str
,
only_rank0
:
bool
=
False
)
->
None
:
if
only_rank0
and
dist
.
get_rank
()
!=
0
:
return
super
().
save_optimizer
(
optimizer
,
path
,
only_rank0
)
def
setup_sampler
(
self
,
dataset
)
->
DistributedSampler
:
return
DistributedSampler
(
dataset
,
dist
.
get_world_size
(),
dist
.
get_rank
())
def
unwrap_model
(
self
,
model
:
nn
.
Module
)
->
nn
.
Module
:
base_model
:
DDP
=
super
().
unwrap_model
(
model
)
return
base_model
.
module
def
save_pretrained
(
self
,
model
:
nn
.
Module
,
path
:
str
,
only_rank0
:
bool
=
True
,
tokenizer
:
Optional
[
PreTrainedTokenizerBase
]
=
None
)
->
None
:
if
only_rank0
and
dist
.
get_rank
()
!=
0
:
return
super
().
save_pretrained
(
model
,
path
,
only_rank0
,
tokenizer
)
applications/Chat/coati/trainer/strategies/naive.py
0 → 100644
View file @
7bc5a8e3
from
typing
import
Any
,
Optional
import
torch
import
torch.nn
as
nn
import
torch.optim
as
optim
from
coati.models.base
import
get_base_model
from
coati.replay_buffer
import
ReplayBuffer
from
torch.optim
import
Optimizer
from
torch.utils.data
import
DataLoader
from
transformers.modeling_utils
import
PreTrainedModel
from
transformers.tokenization_utils_base
import
PreTrainedTokenizerBase
from
.base
import
Strategy
class
NaiveStrategy
(
Strategy
):
"""
Strategy for single GPU. No parallelism is used.
"""
def
backward
(
self
,
loss
:
torch
.
Tensor
,
model
:
nn
.
Module
,
optimizer
:
optim
.
Optimizer
,
**
kwargs
)
->
None
:
loss
.
backward
()
def
optimizer_step
(
self
,
optimizer
:
optim
.
Optimizer
,
**
kwargs
)
->
None
:
optimizer
.
step
()
def
setup_distributed
(
self
)
->
None
:
pass
def
setup_model
(
self
,
model
:
nn
.
Module
)
->
nn
.
Module
:
return
model
def
setup_optimizer
(
self
,
optimizer
:
optim
.
Optimizer
,
model
:
nn
.
Module
)
->
optim
.
Optimizer
:
return
optimizer
def
setup_dataloader
(
self
,
replay_buffer
:
ReplayBuffer
,
pin_memory
:
bool
=
False
)
->
DataLoader
:
return
DataLoader
(
replay_buffer
,
batch_size
=
replay_buffer
.
sample_batch_size
,
shuffle
=
True
,
drop_last
=
True
,
pin_memory
=
pin_memory
,
collate_fn
=
replay_buffer
.
collate_fn
)
def
save_model
(
self
,
model
:
nn
.
Module
,
path
:
str
,
only_rank0
:
bool
=
True
)
->
None
:
base_model
=
get_base_model
(
model
)
state_dict
=
base_model
.
state_dict
()
torch
.
save
(
state_dict
,
path
)
def
load_model
(
self
,
model
:
nn
.
Module
,
path
:
str
,
map_location
:
Any
=
None
,
strict
:
bool
=
True
)
->
None
:
base_model
=
get_base_model
(
model
)
state_dict
=
torch
.
load
(
path
,
map_location
=
map_location
)
base_model
.
load_state_dict
(
state_dict
,
strict
=
strict
)
def
save_optimizer
(
self
,
optimizer
:
Optimizer
,
path
:
str
,
only_rank0
:
bool
=
False
)
->
None
:
torch
.
save
(
optimizer
.
state_dict
(),
path
)
def
load_optimizer
(
self
,
optimizer
:
Optimizer
,
path
:
str
,
map_location
:
Any
=
None
)
->
None
:
state_dict
=
torch
.
load
(
path
,
map_location
=
map_location
)
optimizer
.
load_state_dict
(
state_dict
)
def
save_pretrained
(
self
,
model
:
nn
.
Module
,
path
:
str
,
only_rank0
:
bool
=
True
,
tokenizer
:
Optional
[
PreTrainedTokenizerBase
]
=
None
)
->
None
:
unwrapped_model
=
self
.
unwrap_model
(
model
)
assert
isinstance
(
unwrapped_model
,
PreTrainedModel
)
unwrapped_model
.
save_pretrained
(
path
)
if
tokenizer
is
not
None
:
tokenizer
.
save_pretrained
(
path
)
applications/Chat/coati/trainer/strategies/sampler.py
0 → 100644
View file @
7bc5a8e3
import
math
import
numpy
as
np
class
DistributedSampler
:
def
__init__
(
self
,
dataset
,
num_replicas
:
int
,
rank
:
int
)
->
None
:
self
.
dataset
=
dataset
self
.
num_replicas
=
num_replicas
self
.
rank
=
rank
if
len
(
self
.
dataset
)
%
self
.
num_replicas
!=
0
:
self
.
num_samples
=
math
.
ceil
(
(
len
(
self
.
dataset
)
-
self
.
num_replicas
)
/
self
.
num_replicas
# type: ignore[arg-type]
)
else
:
self
.
num_samples
=
math
.
ceil
(
len
(
self
.
dataset
)
/
self
.
num_replicas
)
self
.
total_size
=
self
.
num_samples
*
self
.
num_replicas
indices
=
list
(
range
(
len
(
self
.
dataset
)))
indices
=
indices
[:
self
.
total_size
]
assert
len
(
indices
)
==
self
.
total_size
# subsample
indices
=
indices
[
self
.
rank
:
self
.
total_size
:
self
.
num_replicas
]
assert
len
(
indices
)
==
self
.
num_samples
self
.
indices
=
indices
def
sample
(
self
,
batch_size
:
int
)
->
list
:
sampled_indices
=
np
.
random
.
choice
(
self
.
indices
,
batch_size
,
replace
=
False
)
return
[
self
.
dataset
[
idx
]
for
idx
in
sampled_indices
]
applications/Chat/coati/trainer/utils.py
0 → 100644
View file @
7bc5a8e3
from
typing
import
Any
import
torch
import
torch.distributed
as
dist
from
torch.utils._pytree
import
tree_map
def
is_rank_0
()
->
bool
:
return
not
dist
.
is_initialized
()
or
dist
.
get_rank
()
==
0
def
to_device
(
x
:
Any
,
device
:
torch
.
device
)
->
Any
:
def
_to
(
t
:
Any
):
if
isinstance
(
t
,
torch
.
Tensor
):
return
t
.
to
(
device
)
return
t
return
tree_map
(
_to
,
x
)
applications/Chat/coati/utils/__init__.py
0 → 100644
View file @
7bc5a8e3
from
.tokenizer_utils
import
prepare_llama_tokenizer_and_embedding
,
smart_tokenizer_and_embedding_resize
__all__
=
[
'smart_tokenizer_and_embedding_resize'
,
'prepare_llama_tokenizer_and_embedding'
]
\ No newline at end of file
Prev
1
…
3
4
5
6
7
8
9
10
11
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment