Unverified Commit 079bf3cb authored by Hongxin Liu's avatar Hongxin Liu Committed by GitHub
Browse files

[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit

* [misc] run pre-commit

* [misc] remove useless configuration files

* [misc] ignore cuda for clang-format
parent 3c6b831c
......@@ -20,14 +20,14 @@ class NaiveExperienceBuffer(ExperienceBuffer):
def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload: bool = True) -> None:
super().__init__(sample_batch_size, limit)
self.cpu_offload = cpu_offload
self.target_device = torch.device(f'cuda:{torch.cuda.current_device()}')
self.target_device = torch.device(f"cuda:{torch.cuda.current_device()}")
# TODO(ver217): add prefetch
self.items: List[BufferItem] = []
@torch.no_grad()
def append(self, experience: Experience) -> None:
if self.cpu_offload:
experience.to_device(torch.device('cpu'))
experience.to_device(torch.device("cpu"))
items = split_experience_batch(experience)
self.items.extend(items)
if self.limit > 0:
......
......@@ -21,6 +21,7 @@ class BufferItem:
"A" is the number of actions.
"""
sequences: torch.Tensor
action_log_probs: torch.Tensor
values: torch.Tensor
......@@ -33,8 +34,7 @@ class BufferItem:
def split_experience_batch(experience: Experience) -> List[BufferItem]:
batch_size = experience.sequences.size(0)
batch_kwargs = [{} for _ in range(batch_size)]
keys = ('sequences', 'action_log_probs', 'values',
'reward', 'advantages', 'attention_mask', 'action_mask')
keys = ("sequences", "action_log_probs", "values", "reward", "advantages", "attention_mask", "action_mask")
for key in keys:
value = getattr(experience, key)
if isinstance(value, torch.Tensor):
......@@ -49,22 +49,21 @@ def split_experience_batch(experience: Experience) -> List[BufferItem]:
return items
def _zero_pad_sequences(sequences: List[torch.Tensor], side: str = 'left') -> torch.Tensor:
assert side in ('left', 'right')
def _zero_pad_sequences(sequences: List[torch.Tensor], side: str = "left") -> torch.Tensor:
assert side in ("left", "right")
max_len = max(seq.size(0) for seq in sequences)
padded_sequences = []
for seq in sequences:
pad_len = max_len - seq.size(0)
padding = (pad_len, 0) if side == 'left' else (0, pad_len)
padding = (pad_len, 0) if side == "left" else (0, pad_len)
padded_sequences.append(F.pad(seq, padding))
return torch.stack(padded_sequences, dim=0)
def make_experience_batch(items: List[BufferItem]) -> Experience:
kwargs = {}
to_pad_keys = set(('action_log_probs', 'action_mask'))
keys = ('sequences', 'action_log_probs', 'values',
'reward', 'advantages', 'attention_mask', 'action_mask')
to_pad_keys = set(("action_log_probs", "action_mask"))
keys = ("sequences", "action_log_probs", "values", "reward", "advantages", "attention_mask", "action_mask")
for key in keys:
vals = [getattr(item, key) for item in items]
if key in to_pad_keys:
......
from .base import Experience, ExperienceMaker
from .naive import NaiveExperienceMaker
__all__ = ['Experience', 'ExperienceMaker', 'NaiveExperienceMaker']
__all__ = ["Experience", "ExperienceMaker", "NaiveExperienceMaker"]
......@@ -24,6 +24,7 @@ class Experience:
"A" is the number of actions.
"""
sequences: torch.Tensor
action_log_probs: torch.Tensor
values: torch.Tensor
......@@ -58,13 +59,9 @@ class Experience:
class ExperienceMaker(ABC):
def __init__(self,
actor: Actor,
critic: nn.Module,
reward_model: nn.Module,
initial_model: Actor,
kl_coef: float = 0.1) -> None:
def __init__(
self, actor: Actor, critic: nn.Module, reward_model: nn.Module, initial_model: Actor, kl_coef: float = 0.1
) -> None:
super().__init__()
self.actor = actor
self.critic = critic
......
......@@ -23,13 +23,12 @@ class NaiveExperienceMaker(ExperienceMaker):
# calculate auxiliary tensors
attention_mask = None
pad_token_id = generate_kwargs.get('pad_token_id', None)
pad_token_id = generate_kwargs.get("pad_token_id", None)
if pad_token_id is not None:
attention_mask = sequences.not_equal(pad_token_id)\
.to(dtype=torch.long, device=sequences.device)
attention_mask = sequences.not_equal(pad_token_id).to(dtype=torch.long, device=sequences.device)
input_len = input_ids.size(1)
eos_token_id = generate_kwargs.get('eos_token_id', None)
eos_token_id = generate_kwargs.get("eos_token_id", None)
if eos_token_id is None:
action_mask = torch.ones_like(sequences, dtype=torch.bool)
else:
......@@ -38,7 +37,7 @@ class NaiveExperienceMaker(ExperienceMaker):
action_mask = F.pad(action_mask, (1 + input_len, -1), value=True) # include eos token and input
action_mask[:, :input_len] = False
action_mask = action_mask[:, 1:]
action_mask = action_mask[:, -(sequences.size(1) - input_len):]
action_mask = action_mask[:, -(sequences.size(1) - input_len) :]
num_actions = action_mask.size(1)
actor_output = self.actor(sequences, attention_mask)
......
from .wrapper import convert_to_xformer_model, recover_from_xformer_model
__all__ = [
'convert_to_xformer_model',
'recover_from_xformer_model',
"convert_to_xformer_model",
"recover_from_xformer_model",
]
......@@ -21,11 +21,12 @@ class XOPTAttention(OPTAttention):
output_attentions: bool = False,
) -> Tuple[Tensor, Optional[Tensor], Optional[Tuple[Tensor]]]:
if not self.training:
return super().forward(hidden_states, key_value_states, past_key_value, attention_mask, layer_head_mask,
output_attentions)
return super().forward(
hidden_states, key_value_states, past_key_value, attention_mask, layer_head_mask, output_attentions
)
"""Input shape: Batch x Time x Channel"""
assert layer_head_mask is None, 'Xformers attention does not support layer_head_mask'
assert not output_attentions, 'Xformers attention does not support output_attentions'
assert layer_head_mask is None, "Xformers attention does not support layer_head_mask"
assert not output_attentions, "Xformers attention does not support output_attentions"
# if key_value_states are provided this layer is used as a cross-attention layer
# for the decoder
......@@ -69,12 +70,14 @@ class XOPTAttention(OPTAttention):
key_states = key_states.transpose(1, 2)
value_states = value_states.transpose(1, 2)
attn_output = xops.memory_efficient_attention(query_states,
attn_output = xops.memory_efficient_attention(
query_states,
key_states,
value_states,
attn_bias=xops.LowerTriangularMask(),
p=self.dropout if self.training else 0.0,
scale=self.scaling)
scale=self.scaling,
)
# Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
# partitioned across GPUs when using tensor-parallelism.
......
......@@ -3,6 +3,13 @@ from .lora import LoRAModule, convert_to_lora_module
from .loss import LogExpLoss, LogSigLoss, PolicyLoss, ValueLoss
__all__ = [
'Actor', 'Critic', 'RewardModel', 'PolicyLoss', 'ValueLoss', 'LogSigLoss', 'LogExpLoss',
'LoRAModule', 'convert_to_lora_module'
"Actor",
"Critic",
"RewardModel",
"PolicyLoss",
"ValueLoss",
"LogSigLoss",
"LogExpLoss",
"LoRAModule",
"convert_to_lora_module",
]
......@@ -18,9 +18,10 @@ def get_base_model(model: Union[Actor, Critic, RewardModel]) -> nn.Module:
Returns:
nn.Module: the base model
"""
assert isinstance(model, (Actor, Critic, RewardModel)), \
f'Expect Actor, Critic or RewardModel, got {type(model)}, use unwrap_model first.'
assert isinstance(
model, (Actor, Critic, RewardModel)
), f"Expect Actor, Critic or RewardModel, got {type(model)}, use unwrap_model first."
return model.model
__all__ = ['Actor', 'Critic', 'RewardModel', 'get_base_model']
__all__ = ["Actor", "Critic", "RewardModel", "get_base_model"]
......@@ -16,7 +16,7 @@ class Actor(LoRAModule):
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = 'none') -> None:
def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = "none") -> None:
super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
self.model = model
self.convert_to_lora()
......@@ -27,7 +27,6 @@ class Actor(LoRAModule):
attention_mask: Optional[torch.Tensor] = None,
**model_kwargs, # HACK: `generate` method may pass more kwargs
) -> torch.Tensor:
"""Returns model output.
"""
"""Returns model output."""
output = self.model(input_ids, attention_mask=attention_mask, **model_kwargs)
return output
......@@ -23,22 +23,23 @@ class Critic(LoRAModule):
model: nn.Module,
value_head: nn.Module,
lora_rank: int = 0,
lora_train_bias: str = 'none',
lora_train_bias: str = "none",
use_action_mask: bool = False,
) -> None:
super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
self.model = model
self.value_head = value_head
self.use_action_mask = use_action_mask
self.convert_to_lora()
def forward(self,
def forward(
self,
sequences: torch.LongTensor,
action_mask: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
attention_mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
outputs = self.model(sequences, attention_mask=attention_mask)
last_hidden_states = outputs['last_hidden_state']
last_hidden_states = outputs["last_hidden_state"]
values = self.value_head(last_hidden_states).squeeze(-1)
......
......@@ -17,11 +17,13 @@ class RewardModel(LoRAModule):
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
def __init__(
self,
model: nn.Module,
value_head: Optional[nn.Module] = None,
lora_rank: int = 0,
lora_train_bias: str = 'none') -> None:
lora_train_bias: str = "none",
) -> None:
super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
self.model = model
self.convert_to_lora()
......@@ -35,7 +37,7 @@ class RewardModel(LoRAModule):
def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
outputs = self.model(sequences, attention_mask=attention_mask)
last_hidden_states = outputs['last_hidden_state']
last_hidden_states = outputs["last_hidden_state"]
values = self.value_head(last_hidden_states)[:, :-1]
value = values.mean(dim=1).squeeze(1) # ensure shape is (B)
return value
......@@ -2,4 +2,4 @@ from .bloom_actor import BLOOMActor
from .bloom_critic import BLOOMCritic
from .bloom_rm import BLOOMRM
__all__ = ['BLOOMActor', 'BLOOMCritic', 'BLOOMRM']
__all__ = ["BLOOMActor", "BLOOMCritic", "BLOOMRM"]
from typing import Optional
import torch
from transformers import BloomConfig, BloomForCausalLM, BloomModel
from transformers import BloomConfig, BloomForCausalLM
from ..base import Actor
......@@ -18,12 +17,14 @@ class BLOOMActor(Actor):
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
def __init__(
self,
pretrained: str = None,
config: Optional[BloomConfig] = None,
checkpoint: bool = False,
lora_rank: int = 0,
lora_train_bias: str = 'none') -> None:
lora_train_bias: str = "none",
) -> None:
if pretrained is not None:
model = BloomForCausalLM.from_pretrained(pretrained)
elif config is not None:
......
from typing import Optional
import torch
import torch.nn as nn
from transformers import BloomConfig, BloomForCausalLM, BloomModel
from transformers import BloomConfig, BloomModel
from ..base import Critic
......@@ -18,12 +17,14 @@ class BLOOMCritic(Critic):
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
def __init__(
self,
pretrained: str = None,
config: Optional[BloomConfig] = None,
lora_rank: int = 0,
lora_train_bias: str = 'none',
**kwargs) -> None:
lora_train_bias: str = "none",
**kwargs,
) -> None:
if pretrained is not None:
model = BloomModel.from_pretrained(pretrained)
elif config is not None:
......
from typing import Optional
import torch.nn as nn
from transformers import BloomConfig, BloomForCausalLM, BloomModel
from transformers import BloomConfig, BloomModel
from ..base import RewardModel
......@@ -17,11 +17,13 @@ class BLOOMRM(RewardModel):
lora_train_bias (str): LoRA bias training mode.
"""
def __init__(self,
def __init__(
self,
pretrained: str = None,
config: Optional[BloomConfig] = None,
lora_rank: int = 0,
lora_train_bias: str = 'none') -> None:
lora_train_bias: str = "none",
) -> None:
if pretrained is not None:
model = BloomModel.from_pretrained(pretrained)
elif config is not None:
......
from .chatglm_actor import ChatGLMActor
__all__ = ['ChatGLMActor']
\ No newline at end of file
__all__ = ["ChatGLMActor"]
from typing import Optional
import torch
from ..base import Actor
from .configuration_chatglm import ChatGLMConfig
from .modeling_chatglm import ChatGLMForConditionalGeneration
from ..base import Actor
class ChatGLMActor(Actor):
"""
......@@ -19,10 +17,9 @@ class ChatGLMActor(Actor):
do not support lora for now.
"""
def __init__(self,
pretrained: str = None,
config: Optional[ChatGLMConfig] = None,
checkpoint: bool = False) -> None:
def __init__(
self, pretrained: str = None, config: Optional[ChatGLMConfig] = None, checkpoint: bool = False
) -> None:
if pretrained is not None:
model = ChatGLMForConditionalGeneration.from_pretrained(pretrained)
elif config is not None:
......@@ -31,4 +28,4 @@ class ChatGLMActor(Actor):
model = ChatGLMForConditionalGeneration(ChatGLMConfig())
if checkpoint:
model.gradient_checkpointing_enable()
super().__init__(model, lora_rank=0, lora_train_bias='none')
super().__init__(model, lora_rank=0, lora_train_bias="none")
......@@ -2,15 +2,14 @@
This code is copied from https://huggingface.co/THUDM/chatglm-6b/blob/main/tokenization_chatglm.py
"""
"""Tokenization classes for ChatGLM."""
from typing import List, Optional, Union
import os
from typing import Dict, List, Optional, Union
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers.utils import logging, PaddingStrategy
from transformers.tokenization_utils_base import EncodedInput, BatchEncoding
from typing import Dict
import sentencepiece as spm
import numpy as np
import sentencepiece as spm
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers.tokenization_utils_base import BatchEncoding, EncodedInput
from transformers.utils import PaddingStrategy, logging
logger = logging.get_logger(__name__)
......@@ -100,9 +99,7 @@ class SPTokenizer:
text = self._encode_whitespaces(text, max_len=self.max_blank_length)
return text
def encode(
self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True
) -> List[int]:
def encode(self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True) -> List[int]:
"""
@param text: Text to encode.
@param linebreak: Whether to encode newline (\n) in text.
......@@ -136,9 +133,7 @@ class SPTokenizer:
text = self.postprocess(text)
return text
def tokenize(
self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True
) -> List[str]:
def tokenize(self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True) -> List[str]:
"""
@param text: Text to encode.
@param linebreak: Whether to encode newline (\n) in text.
......@@ -185,16 +180,16 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
vocab_file,
do_lower_case=False,
remove_space=False,
bos_token='<sop>',
eos_token='<eop>',
end_token='</s>',
mask_token='[MASK]',
gmask_token='[gMASK]',
bos_token="<sop>",
eos_token="<eop>",
end_token="</s>",
mask_token="[MASK]",
gmask_token="[gMASK]",
padding_side="left",
pad_token="<pad>",
unk_token="<unk>",
num_image_tokens=20000,
**kwargs
**kwargs,
) -> None:
super().__init__(
do_lower_case=do_lower_case,
......@@ -208,7 +203,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
pad_token=pad_token,
unk_token=unk_token,
num_image_tokens=num_image_tokens,
**kwargs
**kwargs,
)
self.do_lower_case = do_lower_case
......@@ -243,11 +238,11 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
@property
def vocab_size(self):
""" Returns vocab size """
"""Returns vocab size"""
return self.sp_tokenizer.num_tokens
def get_vocab(self):
""" Returns vocab as a dict """
"""Returns vocab as a dict"""
vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
vocab.update(self.added_tokens_encoder)
return vocab
......@@ -264,7 +259,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
return outputs
def _tokenize(self, text, **kwargs):
""" Returns a tokenized string. """
"""Returns a tokenized string."""
text = self.preprocess_text(text)
seq = self.sp_tokenizer.tokenize(text)
......@@ -274,11 +269,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
def convert_tokens_to_string(self, tokens: List[str]) -> str:
return self.sp_tokenizer.decode_tokens(tokens)
def _decode(
self,
token_ids: Union[int, List[int]],
**kwargs
) -> str:
def _decode(self, token_ids: Union[int, List[int]], **kwargs) -> str:
if isinstance(token_ids, int):
token_ids = [token_ids]
if len(token_ids) == 0:
......@@ -288,7 +279,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
return super()._decode(token_ids, **kwargs)
def _convert_token_to_id(self, token):
""" Converts a token (str) in an id using the vocab. """
"""Converts a token (str) in an id using the vocab."""
return self.sp_tokenizer[token]
def _convert_id_to_token(self, index):
......@@ -309,13 +300,11 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
`Tuple(str)`: Paths to the files saved.
"""
if os.path.isdir(save_directory):
vocab_file = os.path.join(
save_directory, self.vocab_files_names["vocab_file"]
)
vocab_file = os.path.join(save_directory, self.vocab_files_names["vocab_file"])
else:
vocab_file = save_directory
with open(self.vocab_file, 'rb') as fin:
with open(self.vocab_file, "rb") as fin:
proto_str = fin.read()
with open(vocab_file, "wb") as writer:
......@@ -343,7 +332,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
`List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
"""
gmask_id = self.sp_tokenizer[self.gmask_token]
eos_id = self.sp_tokenizer[self.eos_token]
self.sp_tokenizer[self.eos_token]
token_ids_0 = token_ids_0 + [gmask_id, self.sp_tokenizer[self.bos_token]]
if token_ids_1 is not None:
token_ids_0 = token_ids_0 + token_ids_1
......@@ -421,17 +410,23 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
mask_position = required_input.index(mask_token)
position_ids[context_length:] = mask_position
block_position_ids = np.concatenate(
[np.zeros(context_length, dtype=np.int64),
np.arange(1, seq_length - context_length + 1, dtype=np.int64)])
[
np.zeros(context_length, dtype=np.int64),
np.arange(1, seq_length - context_length + 1, dtype=np.int64),
]
)
encoded_inputs["position_ids"] = np.stack([position_ids, block_position_ids], axis=0)
if needs_to_be_padded:
difference = max_length - len(required_input)
if "attention_mask" in encoded_inputs:
encoded_inputs["attention_mask"] = np.pad(encoded_inputs["attention_mask"],
encoded_inputs["attention_mask"] = np.pad(
encoded_inputs["attention_mask"],
pad_width=[(0, 0), (difference, 0), (difference, 0)],
mode='constant', constant_values=True)
mode="constant",
constant_values=True,
)
if "token_type_ids" in encoded_inputs:
encoded_inputs["token_type_ids"] = [self.pad_token_type_id] * difference + encoded_inputs[
"token_type_ids"
......@@ -439,8 +434,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
if "special_tokens_mask" in encoded_inputs:
encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
if "position_ids" in encoded_inputs:
encoded_inputs["position_ids"] = np.pad(encoded_inputs["position_ids"],
pad_width=[(0, 0), (difference, 0)])
encoded_inputs["position_ids"] = np.pad(
encoded_inputs["position_ids"], pad_width=[(0, 0), (difference, 0)]
)
encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
return encoded_inputs
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment