[misc] update pre-commit and run all files (#4752)

* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format

[misc] update pre-commit and run all files (#4752)
* [misc] update pre-commit * [misc] run pre-commit * [misc] remove useless configuration files * [misc] ignore cuda for clang-format
079bf3cb · Hongxin Liu · GitHub · 3c6b831c · 079bf3cb · 079bf3cb
Unverified Commit 079bf3cb authored Sep 19, 2023 by Hongxin Liu Committed by GitHub Sep 19, 2023
20 changed files
--- a/applications/Chat/coati/experience_buffer/base.py
+++ b/applications/Chat/coati/experience_buffer/base.py
@@ -7,9 +7,9 @@ from coati.experience_maker.base import Experience
 class ExperienceBuffer(ABC):
    """Experience buffer base class. It stores experience.
-     Args:
+    Args:
-         sample_batch_size (int): Batch size when sampling.
+        sample_batch_size (int): Batch size when sampling.
-         limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
+        limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
    """
    def __init__(self, sample_batch_size: int, limit: int = 0) -> None:

--- a/applications/Chat/coati/experience_buffer/naive.py
+++ b/applications/Chat/coati/experience_buffer/naive.py
@@ -11,23 +11,23 @@ from .utils import BufferItem, make_experience_batch, split_experience_batch
 class NaiveExperienceBuffer(ExperienceBuffer):
    """Naive experience buffer class. It stores experience.
-     Args:
+    Args:
-         sample_batch_size (int): Batch size when sampling.
+        sample_batch_size (int): Batch size when sampling.
-         limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
+        limit (int, optional): Limit of number of experience samples. A number <= 0 means unlimited. Defaults to 0.
-         cpu_offload (bool, optional): Whether to offload experience to cpu when sampling. Defaults to True.
+        cpu_offload (bool, optional): Whether to offload experience to cpu when sampling. Defaults to True.
    """
    def __init__(self, sample_batch_size: int, limit: int = 0, cpu_offload: bool = True) -> None:
        super().__init__(sample_batch_size, limit)
        self.cpu_offload = cpu_offload
-        self.target_device = torch.device(f'cuda:{torch.cuda.current_device()}')
+        self.target_device = torch.device(f"cuda:{torch.cuda.current_device()}")
        # TODO(ver217): add prefetch
        self.items: List[BufferItem] = []
    @torch.no_grad()
    def append(self, experience: Experience) -> None:
        if self.cpu_offload:
-            experience.to_device(torch.device('cpu'))
+            experience.to_device(torch.device("cpu"))
        items = split_experience_batch(experience)
        self.items.extend(items)
        if self.limit > 0:

--- a/applications/Chat/coati/experience_buffer/utils.py
+++ b/applications/Chat/coati/experience_buffer/utils.py
@@ -21,6 +21,7 @@ class BufferItem:
    "A" is the number of actions.
    """
    sequences: torch.Tensor
    action_log_probs: torch.Tensor
    values: torch.Tensor
@@ -33,8 +34,7 @@ class BufferItem:
 def split_experience_batch(experience: Experience) -> List[BufferItem]:
    batch_size = experience.sequences.size(0)
    batch_kwargs = [{} for _ in range(batch_size)]
-    keys = ('sequences', 'action_log_probs', 'values',
+    keys = ("sequences", "action_log_probs", "values", "reward", "advantages", "attention_mask", "action_mask")
-            'reward', 'advantages', 'attention_mask', 'action_mask')
    for key in keys:
        value = getattr(experience, key)
        if isinstance(value, torch.Tensor):
@@ -49,22 +49,21 @@ def split_experience_batch(experience: Experience) -> List[BufferItem]:
    return items
-def _zero_pad_sequences(sequences: List[torch.Tensor], side: str = 'left') -> torch.Tensor:
+def _zero_pad_sequences(sequences: List[torch.Tensor], side: str = "left") -> torch.Tensor:
-    assert side in ('left', 'right')
+    assert side in ("left", "right")
    max_len = max(seq.size(0) for seq in sequences)
    padded_sequences = []
    for seq in sequences:
        pad_len = max_len - seq.size(0)
-        padding = (pad_len, 0) if side == 'left' else (0, pad_len)
+        padding = (pad_len, 0) if side == "left" else (0, pad_len)
        padded_sequences.append(F.pad(seq, padding))
    return torch.stack(padded_sequences, dim=0)
 def make_experience_batch(items: List[BufferItem]) -> Experience:
    kwargs = {}
-    to_pad_keys = set(('action_log_probs', 'action_mask'))
+    to_pad_keys = set(("action_log_probs", "action_mask"))
-    keys = ('sequences', 'action_log_probs', 'values',
+    keys = ("sequences", "action_log_probs", "values", "reward", "advantages", "attention_mask", "action_mask")
-            'reward', 'advantages', 'attention_mask', 'action_mask')
    for key in keys:
        vals = [getattr(item, key) for item in items]
        if key in to_pad_keys:

--- a/applications/Chat/coati/experience_maker/__init__.py
+++ b/applications/Chat/coati/experience_maker/__init__.py
 from .base import Experience, ExperienceMaker
 from .naive import NaiveExperienceMaker
-__all__ = ['Experience', 'ExperienceMaker', 'NaiveExperienceMaker']
+__all__ = ["Experience", "ExperienceMaker", "NaiveExperienceMaker"]
--- a/applications/Chat/coati/experience_maker/base.py
+++ b/applications/Chat/coati/experience_maker/base.py
@@ -24,6 +24,7 @@ class Experience:
    "A" is the number of actions.
    """
    sequences: torch.Tensor
    action_log_probs: torch.Tensor
    values: torch.Tensor
@@ -58,13 +59,9 @@ class Experience:
 class ExperienceMaker(ABC):
+    def __init__(
-    def __init__(self,
+        self, actor: Actor, critic: nn.Module, reward_model: nn.Module, initial_model: Actor, kl_coef: float = 0.1
-                 actor: Actor,
+    ) -> None:
-                 critic: nn.Module,
-                 reward_model: nn.Module,
-                 initial_model: Actor,
-                 kl_coef: float = 0.1) -> None:
        super().__init__()
        self.actor = actor
        self.critic = critic

--- a/applications/Chat/coati/experience_maker/naive.py
+++ b/applications/Chat/coati/experience_maker/naive.py
@@ -23,22 +23,21 @@ class NaiveExperienceMaker(ExperienceMaker):
        # calculate auxiliary tensors
        attention_mask = None
-        pad_token_id = generate_kwargs.get('pad_token_id', None)
+        pad_token_id = generate_kwargs.get("pad_token_id", None)
        if pad_token_id is not None:
-            attention_mask = sequences.not_equal(pad_token_id)\
+            attention_mask = sequences.not_equal(pad_token_id).to(dtype=torch.long, device=sequences.device)
-                .to(dtype=torch.long, device=sequences.device)
        input_len = input_ids.size(1)
-        eos_token_id = generate_kwargs.get('eos_token_id', None)
+        eos_token_id = generate_kwargs.get("eos_token_id", None)
        if eos_token_id is None:
            action_mask = torch.ones_like(sequences, dtype=torch.bool)
        else:
            # left padding may be applied, only mask action
            action_mask = (sequences[:, input_len:] == eos_token_id).cumsum(dim=-1) == 0
-            action_mask = F.pad(action_mask, (1 + input_len, -1), value=True)    # include eos token and input
+            action_mask = F.pad(action_mask, (1 + input_len, -1), value=True)  # include eos token and input
        action_mask[:, :input_len] = False
        action_mask = action_mask[:, 1:]
-        action_mask = action_mask[:, -(sequences.size(1) - input_len):]
+        action_mask = action_mask[:, -(sequences.size(1) - input_len) :]
        num_actions = action_mask.size(1)
        actor_output = self.actor(sequences, attention_mask)

--- a/applications/Chat/coati/kernels/__init__.py
+++ b/applications/Chat/coati/kernels/__init__.py
 from .wrapper import convert_to_xformer_model, recover_from_xformer_model
 __all__ = [
-    'convert_to_xformer_model',
+    "convert_to_xformer_model",
-    'recover_from_xformer_model',
+    "recover_from_xformer_model",
 ]
--- a/applications/Chat/coati/kernels/opt_attn.py
+++ b/applications/Chat/coati/kernels/opt_attn.py
@@ -21,11 +21,12 @@ class XOPTAttention(OPTAttention):
        output_attentions: bool = False,
    ) -> Tuple[Tensor, Optional[Tensor], Optional[Tuple[Tensor]]]:
        if not self.training:
-            return super().forward(hidden_states, key_value_states, past_key_value, attention_mask, layer_head_mask,
+            return super().forward(
-                                   output_attentions)
+                hidden_states, key_value_states, past_key_value, attention_mask, layer_head_mask, output_attentions
+            )
        """Input shape: Batch x Time x Channel"""
-        assert layer_head_mask is None, 'Xformers attention does not support layer_head_mask'
+        assert layer_head_mask is None, "Xformers attention does not support layer_head_mask"
-        assert not output_attentions, 'Xformers attention does not support output_attentions'
+        assert not output_attentions, "Xformers attention does not support output_attentions"
        # if key_value_states are provided this layer is used as a cross-attention layer
        # for the decoder
@@ -69,12 +70,14 @@ class XOPTAttention(OPTAttention):
        key_states = key_states.transpose(1, 2)
        value_states = value_states.transpose(1, 2)
-        attn_output = xops.memory_efficient_attention(query_states,
+        attn_output = xops.memory_efficient_attention(
-                                                      key_states,
+            query_states,
-                                                      value_states,
+            key_states,
-                                                      attn_bias=xops.LowerTriangularMask(),
+            value_states,
-                                                      p=self.dropout if self.training else 0.0,
+            attn_bias=xops.LowerTriangularMask(),
-                                                      scale=self.scaling)
+            p=self.dropout if self.training else 0.0,
+            scale=self.scaling,
+        )
        # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
        # partitioned across GPUs when using tensor-parallelism.

--- a/applications/Chat/coati/models/__init__.py
+++ b/applications/Chat/coati/models/__init__.py
@@ -3,6 +3,13 @@ from .lora import LoRAModule, convert_to_lora_module
 from .loss import LogExpLoss, LogSigLoss, PolicyLoss, ValueLoss
 __all__ = [
-    'Actor', 'Critic', 'RewardModel', 'PolicyLoss', 'ValueLoss', 'LogSigLoss', 'LogExpLoss',
+    "Actor",
-    'LoRAModule', 'convert_to_lora_module'
+    "Critic",
+    "RewardModel",
+    "PolicyLoss",
+    "ValueLoss",
+    "LogSigLoss",
+    "LogExpLoss",
+    "LoRAModule",
+    "convert_to_lora_module",
 ]
--- a/applications/Chat/coati/models/base/__init__.py
+++ b/applications/Chat/coati/models/base/__init__.py
@@ -9,7 +9,7 @@ from .reward_model import RewardModel
 def get_base_model(model: Union[Actor, Critic, RewardModel]) -> nn.Module:
    """Get the base model of our wrapper classes.
-    For Actor, Critic and RewardModel, return ``model.model``, 
+    For Actor, Critic and RewardModel, return ``model.model``,
    it's usually a ``transformers.PreTrainedModel``.
    Args:
@@ -18,9 +18,10 @@ def get_base_model(model: Union[Actor, Critic, RewardModel]) -> nn.Module:
    Returns:
        nn.Module: the base model
    """
-    assert isinstance(model, (Actor, Critic, RewardModel)), \
+    assert isinstance(
-        f'Expect Actor, Critic or RewardModel, got {type(model)}, use unwrap_model first.'
+        model, (Actor, Critic, RewardModel)
+    ), f"Expect Actor, Critic or RewardModel, got {type(model)}, use unwrap_model first."
    return model.model
-__all__ = ['Actor', 'Critic', 'RewardModel', 'get_base_model']
+__all__ = ["Actor", "Critic", "RewardModel", "get_base_model"]
--- a/applications/Chat/coati/models/base/actor.py
+++ b/applications/Chat/coati/models/base/actor.py
@@ -16,18 +16,17 @@ class Actor(LoRAModule):
        lora_train_bias (str): LoRA bias training mode.
    """
-    def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = 'none') -> None:
+    def __init__(self, model: nn.Module, lora_rank: int = 0, lora_train_bias: str = "none") -> None:
        super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
        self.model = model
        self.convert_to_lora()
    def forward(
-            self,
+        self,
-            input_ids: torch.LongTensor,
+        input_ids: torch.LongTensor,
-            attention_mask: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
-            **model_kwargs,    # HACK: `generate` method may pass more kwargs
+        **model_kwargs,  # HACK: `generate` method may pass more kwargs
    ) -> torch.Tensor:
-        """Returns model output.
+        """Returns model output."""
-        """
        output = self.model(input_ids, attention_mask=attention_mask, **model_kwargs)
        return output
--- a/applications/Chat/coati/models/base/critic.py
+++ b/applications/Chat/coati/models/base/critic.py
@@ -23,22 +23,23 @@ class Critic(LoRAModule):
        model: nn.Module,
        value_head: nn.Module,
        lora_rank: int = 0,
-        lora_train_bias: str = 'none',
+        lora_train_bias: str = "none",
        use_action_mask: bool = False,
    ) -> None:
        super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
        self.model = model
        self.value_head = value_head
        self.use_action_mask = use_action_mask
        self.convert_to_lora()
-    def forward(self,
+    def forward(
-                sequences: torch.LongTensor,
+        self,
-                action_mask: Optional[torch.Tensor] = None,
+        sequences: torch.LongTensor,
-                attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
+        action_mask: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
        outputs = self.model(sequences, attention_mask=attention_mask)
-        last_hidden_states = outputs['last_hidden_state']
+        last_hidden_states = outputs["last_hidden_state"]
        values = self.value_head(last_hidden_states).squeeze(-1)

--- a/applications/Chat/coati/models/base/reward_model.py
+++ b/applications/Chat/coati/models/base/reward_model.py
@@ -17,11 +17,13 @@ class RewardModel(LoRAModule):
        lora_train_bias (str): LoRA bias training mode.
    """
-    def __init__(self,
+    def __init__(
-                 model: nn.Module,
+        self,
-                 value_head: Optional[nn.Module] = None,
+        model: nn.Module,
-                 lora_rank: int = 0,
+        value_head: Optional[nn.Module] = None,
-                 lora_train_bias: str = 'none') -> None:
+        lora_rank: int = 0,
+        lora_train_bias: str = "none",
+    ) -> None:
        super().__init__(lora_rank=lora_rank, lora_train_bias=lora_train_bias)
        self.model = model
        self.convert_to_lora()
@@ -35,7 +37,7 @@ class RewardModel(LoRAModule):
    def forward(self, sequences: torch.LongTensor, attention_mask: Optional[torch.Tensor] = None) -> torch.Tensor:
        outputs = self.model(sequences, attention_mask=attention_mask)
-        last_hidden_states = outputs['last_hidden_state']
+        last_hidden_states = outputs["last_hidden_state"]
        values = self.value_head(last_hidden_states)[:, :-1]
-        value = values.mean(dim=1).squeeze(1)    # ensure shape is (B)
+        value = values.mean(dim=1).squeeze(1)  # ensure shape is (B)
        return value
--- a/applications/Chat/coati/models/bloom/__init__.py
+++ b/applications/Chat/coati/models/bloom/__init__.py
@@ -2,4 +2,4 @@ from .bloom_actor import BLOOMActor
 from .bloom_critic import BLOOMCritic
 from .bloom_rm import BLOOMRM
-__all__ = ['BLOOMActor', 'BLOOMCritic', 'BLOOMRM']
+__all__ = ["BLOOMActor", "BLOOMCritic", "BLOOMRM"]
--- a/applications/Chat/coati/models/bloom/bloom_actor.py
+++ b/applications/Chat/coati/models/bloom/bloom_actor.py
 from typing import Optional
-import torch
+from transformers import BloomConfig, BloomForCausalLM
-from transformers import BloomConfig, BloomForCausalLM, BloomModel
 from ..base import Actor
@@ -18,12 +17,14 @@ class BLOOMActor(Actor):
        lora_train_bias (str): LoRA bias training mode.
    """
-    def __init__(self,
+    def __init__(
-                 pretrained: str = None,
+        self,
-                 config: Optional[BloomConfig] = None,
+        pretrained: str = None,
-                 checkpoint: bool = False,
+        config: Optional[BloomConfig] = None,
-                 lora_rank: int = 0,
+        checkpoint: bool = False,
-                 lora_train_bias: str = 'none') -> None:
+        lora_rank: int = 0,
+        lora_train_bias: str = "none",
+    ) -> None:
        if pretrained is not None:
            model = BloomForCausalLM.from_pretrained(pretrained)
        elif config is not None:

--- a/applications/Chat/coati/models/bloom/bloom_critic.py
+++ b/applications/Chat/coati/models/bloom/bloom_critic.py
 from typing import Optional
-import torch
 import torch.nn as nn
-from transformers import BloomConfig, BloomForCausalLM, BloomModel
+from transformers import BloomConfig, BloomModel
 from ..base import Critic
@@ -18,12 +17,14 @@ class BLOOMCritic(Critic):
        lora_train_bias (str): LoRA bias training mode.
    """
-    def __init__(self,
+    def __init__(
-                 pretrained: str = None,
+        self,
-                 config: Optional[BloomConfig] = None,
+        pretrained: str = None,
-                 lora_rank: int = 0,
+        config: Optional[BloomConfig] = None,
-                 lora_train_bias: str = 'none',
+        lora_rank: int = 0,
-                 **kwargs) -> None:
+        lora_train_bias: str = "none",
+        **kwargs,
+    ) -> None:
        if pretrained is not None:
            model = BloomModel.from_pretrained(pretrained)
        elif config is not None:

--- a/applications/Chat/coati/models/bloom/bloom_rm.py
+++ b/applications/Chat/coati/models/bloom/bloom_rm.py
 from typing import Optional
 import torch.nn as nn
-from transformers import BloomConfig, BloomForCausalLM, BloomModel
+from transformers import BloomConfig, BloomModel
 from ..base import RewardModel
@@ -17,11 +17,13 @@ class BLOOMRM(RewardModel):
        lora_train_bias (str): LoRA bias training mode.
    """
-    def __init__(self,
+    def __init__(
-                 pretrained: str = None,
+        self,
-                 config: Optional[BloomConfig] = None,
+        pretrained: str = None,
-                 lora_rank: int = 0,
+        config: Optional[BloomConfig] = None,
-                 lora_train_bias: str = 'none') -> None:
+        lora_rank: int = 0,
+        lora_train_bias: str = "none",
+    ) -> None:
        if pretrained is not None:
            model = BloomModel.from_pretrained(pretrained)
        elif config is not None:

--- a/applications/Chat/coati/models/chatglm/__init__.py
+++ b/applications/Chat/coati/models/chatglm/__init__.py
 from .chatglm_actor import ChatGLMActor
-__all__ = ['ChatGLMActor']
+__all__ = ["ChatGLMActor"]
\ No newline at end of file
--- a/applications/Chat/coati/models/chatglm/chatglm_actor.py
+++ b/applications/Chat/coati/models/chatglm/chatglm_actor.py
 from typing import Optional
-import torch
+from ..base import Actor
 from .configuration_chatglm import ChatGLMConfig
 from .modeling_chatglm import ChatGLMForConditionalGeneration
-from ..base import Actor
 class ChatGLMActor(Actor):
    """
@@ -19,10 +17,9 @@ class ChatGLMActor(Actor):
    do not support lora for now.
    """
-    def __init__(self,
+    def __init__(
-                 pretrained: str = None,
+        self, pretrained: str = None, config: Optional[ChatGLMConfig] = None, checkpoint: bool = False
-                 config: Optional[ChatGLMConfig] = None,
+    ) -> None:
-                 checkpoint: bool = False) -> None:
        if pretrained is not None:
            model = ChatGLMForConditionalGeneration.from_pretrained(pretrained)
        elif config is not None:
@@ -31,4 +28,4 @@ class ChatGLMActor(Actor):
            model = ChatGLMForConditionalGeneration(ChatGLMConfig())
        if checkpoint:
            model.gradient_checkpointing_enable()
-        super().__init__(model, lora_rank=0, lora_train_bias='none')
+        super().__init__(model, lora_rank=0, lora_train_bias="none")
--- a/applications/Chat/coati/models/chatglm/chatglm_tokenizer.py
+++ b/applications/Chat/coati/models/chatglm/chatglm_tokenizer.py
@@ -2,15 +2,14 @@
 This code is copied from https://huggingface.co/THUDM/chatglm-6b/blob/main/tokenization_chatglm.py
 """
 """Tokenization classes for ChatGLM."""
-from typing import List, Optional, Union
 import os
+from typing import Dict, List, Optional, Union
-from transformers.tokenization_utils import PreTrainedTokenizer
-from transformers.utils import logging, PaddingStrategy
-from transformers.tokenization_utils_base import EncodedInput, BatchEncoding
-from typing import Dict
-import sentencepiece as spm
 import numpy as np
+import sentencepiece as spm
+from transformers.tokenization_utils import PreTrainedTokenizer
+from transformers.tokenization_utils_base import BatchEncoding, EncodedInput
+from transformers.utils import PaddingStrategy, logging
 logger = logging.get_logger(__name__)
@@ -52,11 +51,11 @@ class TextTokenizer:
 class SPTokenizer:
    def __init__(
-            self,
+        self,
-            vocab_file,
+        vocab_file,
-            num_image_tokens=20000,
+        num_image_tokens=20000,
-            max_blank_length=80,
+        max_blank_length=80,
-            byte_fallback=True,
+        byte_fallback=True,
    ):
        assert vocab_file is not None
        self.vocab_file = vocab_file
@@ -100,9 +99,7 @@ class SPTokenizer:
            text = self._encode_whitespaces(text, max_len=self.max_blank_length)
        return text
-    def encode(
+    def encode(self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True) -> List[int]:
-            self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True
-    ) -> List[int]:
        """
        @param text: Text to encode.
        @param linebreak: Whether to encode newline (\n) in text.
@@ -136,9 +133,7 @@ class SPTokenizer:
        text = self.postprocess(text)
        return text
-    def tokenize(
+    def tokenize(self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True) -> List[str]:
-            self, text: str, linebreak=True, whitespaces=True, add_dummy_prefix=True
-    ) -> List[str]:
        """
        @param text: Text to encode.
        @param linebreak: Whether to encode newline (\n) in text.
@@ -181,20 +176,20 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
    model_input_names = ["input_ids", "attention_mask", "position_ids"]
    def __init__(
-            self,
+        self,
-            vocab_file,
+        vocab_file,
-            do_lower_case=False,
+        do_lower_case=False,
-            remove_space=False,
+        remove_space=False,
-            bos_token='<sop>',
+        bos_token="<sop>",
-            eos_token='<eop>',
+        eos_token="<eop>",
-            end_token='</s>',
+        end_token="</s>",
-            mask_token='[MASK]',
+        mask_token="[MASK]",
-            gmask_token='[gMASK]',
+        gmask_token="[gMASK]",
-            padding_side="left",
+        padding_side="left",
-            pad_token="<pad>",
+        pad_token="<pad>",
-            unk_token="<unk>",
+        unk_token="<unk>",
-            num_image_tokens=20000,
+        num_image_tokens=20000,
-            **kwargs
+        **kwargs,
    ) -> None:
        super().__init__(
            do_lower_case=do_lower_case,
@@ -208,7 +203,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
            pad_token=pad_token,
            unk_token=unk_token,
            num_image_tokens=num_image_tokens,
-            **kwargs
+            **kwargs,
        )
        self.do_lower_case = do_lower_case
@@ -243,11 +238,11 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
    @property
    def vocab_size(self):
-        """ Returns vocab size """
+        """Returns vocab size"""
        return self.sp_tokenizer.num_tokens
    def get_vocab(self):
-        """ Returns vocab as a dict """
+        """Returns vocab as a dict"""
        vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
        vocab.update(self.added_tokens_encoder)
        return vocab
@@ -264,7 +259,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
        return outputs
    def _tokenize(self, text, **kwargs):
-        """ Returns a tokenized string. """
+        """Returns a tokenized string."""
        text = self.preprocess_text(text)
        seq = self.sp_tokenizer.tokenize(text)
@@ -274,11 +269,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
    def convert_tokens_to_string(self, tokens: List[str]) -> str:
        return self.sp_tokenizer.decode_tokens(tokens)
-    def _decode(
+    def _decode(self, token_ids: Union[int, List[int]], **kwargs) -> str:
-            self,
-            token_ids: Union[int, List[int]],
-            **kwargs
-    ) -> str:
        if isinstance(token_ids, int):
            token_ids = [token_ids]
        if len(token_ids) == 0:
@@ -288,7 +279,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
        return super()._decode(token_ids, **kwargs)
    def _convert_token_to_id(self, token):
-        """ Converts a token (str) in an id using the vocab. """
+        """Converts a token (str) in an id using the vocab."""
        return self.sp_tokenizer[token]
    def _convert_id_to_token(self, index):
@@ -309,13 +300,11 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
            `Tuple(str)`: Paths to the files saved.
        """
        if os.path.isdir(save_directory):
-            vocab_file = os.path.join(
+            vocab_file = os.path.join(save_directory, self.vocab_files_names["vocab_file"])
-                save_directory, self.vocab_files_names["vocab_file"]
-            )
        else:
            vocab_file = save_directory
-        with open(self.vocab_file, 'rb') as fin:
+        with open(self.vocab_file, "rb") as fin:
            proto_str = fin.read()
        with open(vocab_file, "wb") as writer:
@@ -324,7 +313,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
        return (vocab_file,)
    def build_inputs_with_special_tokens(
-            self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
+        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
@@ -343,19 +332,19 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
            `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
        """
        gmask_id = self.sp_tokenizer[self.gmask_token]
-        eos_id = self.sp_tokenizer[self.eos_token]
+        self.sp_tokenizer[self.eos_token]
        token_ids_0 = token_ids_0 + [gmask_id, self.sp_tokenizer[self.bos_token]]
        if token_ids_1 is not None:
            token_ids_0 = token_ids_0 + token_ids_1
        return token_ids_0
    def _pad(
-            self,
+        self,
-            encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
+        encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
-            max_length: Optional[int] = None,
+        max_length: Optional[int] = None,
-            padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
+        padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
-            pad_to_multiple_of: Optional[int] = None,
+        pad_to_multiple_of: Optional[int] = None,
-            return_attention_mask: Optional[bool] = None,
+        return_attention_mask: Optional[bool] = None,
    ) -> dict:
        """
        Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
@@ -421,17 +410,23 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
                    mask_position = required_input.index(mask_token)
                    position_ids[context_length:] = mask_position
                block_position_ids = np.concatenate(
-                    [np.zeros(context_length, dtype=np.int64),
+                    [
-                     np.arange(1, seq_length - context_length + 1, dtype=np.int64)])
+                        np.zeros(context_length, dtype=np.int64),
+                        np.arange(1, seq_length - context_length + 1, dtype=np.int64),
+                    ]
+                )
                encoded_inputs["position_ids"] = np.stack([position_ids, block_position_ids], axis=0)
        if needs_to_be_padded:
            difference = max_length - len(required_input)
            if "attention_mask" in encoded_inputs:
-                encoded_inputs["attention_mask"] = np.pad(encoded_inputs["attention_mask"],
+                encoded_inputs["attention_mask"] = np.pad(
-                                                          pad_width=[(0, 0), (difference, 0), (difference, 0)],
+                    encoded_inputs["attention_mask"],
-                                                          mode='constant', constant_values=True)
+                    pad_width=[(0, 0), (difference, 0), (difference, 0)],
+                    mode="constant",
+                    constant_values=True,
+                )
            if "token_type_ids" in encoded_inputs:
                encoded_inputs["token_type_ids"] = [self.pad_token_type_id] * difference + encoded_inputs[
                    "token_type_ids"
@@ -439,8 +434,9 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
            if "special_tokens_mask" in encoded_inputs:
                encoded_inputs["special_tokens_mask"] = [1] * difference + encoded_inputs["special_tokens_mask"]
            if "position_ids" in encoded_inputs:
-                encoded_inputs["position_ids"] = np.pad(encoded_inputs["position_ids"],
+                encoded_inputs["position_ids"] = np.pad(
-                                                        pad_width=[(0, 0), (difference, 0)])
+                    encoded_inputs["position_ids"], pad_width=[(0, 0), (difference, 0)]
+                )
            encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
        return encoded_inputs
\ No newline at end of file