Unverified Commit 5ffc0d13 authored by Simon Mo's avatar Simon Mo Committed by GitHub
Browse files

Migrate linter from `pylint` to `ruff` (#1665)

parent 112627e8
...@@ -18,7 +18,6 @@ _PARTITION_SIZE = 512 ...@@ -18,7 +18,6 @@ _PARTITION_SIZE = 512
class PagedAttention(nn.Module): class PagedAttention(nn.Module):
# pylint: disable=line-too-long
"""GPT-style multi-head PagedAttention. """GPT-style multi-head PagedAttention.
This class takes query, key, and value tensors as input. The input tensors This class takes query, key, and value tensors as input. The input tensors
......
...@@ -50,7 +50,7 @@ class AWQConfig(QuantizationConfig): ...@@ -50,7 +50,7 @@ class AWQConfig(QuantizationConfig):
def get_config_filenames() -> List[str]: def get_config_filenames() -> List[str]:
return [ return [
"quant_config.json", # E.g., casperhansen/vicuna-7b-v1.5-awq "quant_config.json", # E.g., casperhansen/vicuna-7b-v1.5-awq
"quantize_config.json", # E.g., abhinavkulkarni/mosaicml-mpt-7b-instruct-w4-g128-awq # pylint: disable=line-too-long "quantize_config.json", # E.g., abhinavkulkarni/mosaicml-mpt-7b-instruct-w4-g128-awq
] ]
@classmethod @classmethod
......
...@@ -7,7 +7,7 @@ import torch.nn as nn ...@@ -7,7 +7,7 @@ import torch.nn as nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.model_executor.models import * # pylint: disable=wildcard-import from vllm.model_executor.models import *
from vllm.model_executor.weight_utils import (get_quant_config, from vllm.model_executor.weight_utils import (get_quant_config,
initialize_dummy_weights) initialize_dummy_weights)
......
...@@ -261,10 +261,7 @@ class AquilaModel(nn.Module): ...@@ -261,10 +261,7 @@ class AquilaModel(nn.Module):
) -> torch.Tensor: ) -> torch.Tensor:
hidden_states = self.embed_tokens(input_ids) hidden_states = self.embed_tokens(input_ids)
for i in range(len(self.layers)): for i in range(len(self.layers)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.layers[i] layer = self.layers[i]
hidden_states = layer( hidden_states = layer(
positions, positions,
......
...@@ -281,10 +281,7 @@ class BaiChuanModel(nn.Module): ...@@ -281,10 +281,7 @@ class BaiChuanModel(nn.Module):
hidden_states = self.embed_tokens(input_ids) hidden_states = self.embed_tokens(input_ids)
residual = None residual = None
for i in range(len(self.layers)): for i in range(len(self.layers)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.layers[i] layer = self.layers[i]
hidden_states, residual = layer( hidden_states, residual = layer(
positions, positions,
......
...@@ -256,10 +256,7 @@ class BloomModel(nn.Module): ...@@ -256,10 +256,7 @@ class BloomModel(nn.Module):
hidden_states = self.word_embeddings(input_ids) hidden_states = self.word_embeddings(input_ids)
hidden_states = self.word_embeddings_layernorm(hidden_states) hidden_states = self.word_embeddings_layernorm(hidden_states)
for i in range(len(self.h)): for i in range(len(self.h)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.h[i] layer = self.h[i]
hidden_states = layer( hidden_states = layer(
position_ids, position_ids,
......
...@@ -269,10 +269,7 @@ class GLMTransformer(nn.Module): ...@@ -269,10 +269,7 @@ class GLMTransformer(nn.Module):
cache_events: Optional[List[torch.cuda.Event]], cache_events: Optional[List[torch.cuda.Event]],
) -> torch.Tensor: ) -> torch.Tensor:
for i in range(self.num_layers): for i in range(self.num_layers):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.layers[i] layer = self.layers[i]
hidden_states = layer( hidden_states = layer(
hidden_states=hidden_states, hidden_states=hidden_states,
......
...@@ -353,10 +353,7 @@ class FalconModel(nn.Module): ...@@ -353,10 +353,7 @@ class FalconModel(nn.Module):
) -> torch.Tensor: ) -> torch.Tensor:
hidden_states = self.word_embeddings(input_ids) hidden_states = self.word_embeddings(input_ids)
for i in range(len(self.h)): for i in range(len(self.h)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.h[i] layer = self.h[i]
hidden_states = layer( hidden_states = layer(
positions, positions,
......
...@@ -206,10 +206,7 @@ class GPT2Model(nn.Module): ...@@ -206,10 +206,7 @@ class GPT2Model(nn.Module):
hidden_states = inputs_embeds + position_embeds hidden_states = inputs_embeds + position_embeds
for i in range(len(self.h)): for i in range(len(self.h)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.h[i] layer = self.h[i]
hidden_states = layer(hidden_states, kv_caches[i], input_metadata, hidden_states = layer(hidden_states, kv_caches[i], input_metadata,
cache_event) cache_event)
......
...@@ -225,10 +225,7 @@ class GPTBigCodeModel(nn.Module): ...@@ -225,10 +225,7 @@ class GPTBigCodeModel(nn.Module):
hidden_states = inputs_embeds + position_embeds hidden_states = inputs_embeds + position_embeds
for i in range(len(self.h)): for i in range(len(self.h)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.h[i] layer = self.h[i]
hidden_states = layer(hidden_states, kv_caches[i], input_metadata, hidden_states = layer(hidden_states, kv_caches[i], input_metadata,
cache_event) cache_event)
......
...@@ -147,10 +147,7 @@ class GPTJBlock(nn.Module): ...@@ -147,10 +147,7 @@ class GPTJBlock(nn.Module):
linear_method: Optional[LinearMethodBase] = None, linear_method: Optional[LinearMethodBase] = None,
): ):
super().__init__() super().__init__()
if config.n_inner is None: inner_dim = 4 * config.n_embd if config.n_inner is None else config.n_inner
inner_dim = 4 * config.n_embd
else:
inner_dim = config.n_inner
self.ln_1 = nn.LayerNorm(config.n_embd, eps=config.layer_norm_epsilon) self.ln_1 = nn.LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
self.attn = GPTJAttention(config, linear_method) self.attn = GPTJAttention(config, linear_method)
self.mlp = GPTJMLP(inner_dim, config, linear_method) self.mlp = GPTJMLP(inner_dim, config, linear_method)
...@@ -205,10 +202,7 @@ class GPTJModel(nn.Module): ...@@ -205,10 +202,7 @@ class GPTJModel(nn.Module):
) -> torch.Tensor: ) -> torch.Tensor:
hidden_states = self.wte(input_ids) hidden_states = self.wte(input_ids)
for i in range(len(self.h)): for i in range(len(self.h)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.h[i] layer = self.h[i]
hidden_states = layer( hidden_states = layer(
position_ids, position_ids,
......
...@@ -216,10 +216,7 @@ class GPTNeoXModel(nn.Module): ...@@ -216,10 +216,7 @@ class GPTNeoXModel(nn.Module):
) -> torch.Tensor: ) -> torch.Tensor:
hidden_states = self.embed_in(input_ids) hidden_states = self.embed_in(input_ids)
for i in range(len(self.layers)): for i in range(len(self.layers)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.layers[i] layer = self.layers[i]
hidden_states = layer( hidden_states = layer(
position_ids, position_ids,
......
...@@ -213,10 +213,7 @@ class InternLMModel(nn.Module): ...@@ -213,10 +213,7 @@ class InternLMModel(nn.Module):
hidden_states = self.embed_tokens(input_ids) hidden_states = self.embed_tokens(input_ids)
residual = None residual = None
for i in range(len(self.layers)): for i in range(len(self.layers)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.layers[i] layer = self.layers[i]
hidden_states, residual = layer( hidden_states, residual = layer(
positions, positions,
......
...@@ -253,10 +253,7 @@ class LlamaModel(nn.Module): ...@@ -253,10 +253,7 @@ class LlamaModel(nn.Module):
hidden_states = self.embed_tokens(input_ids) hidden_states = self.embed_tokens(input_ids)
residual = None residual = None
for i in range(len(self.layers)): for i in range(len(self.layers)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.layers[i] layer = self.layers[i]
hidden_states, residual = layer( hidden_states, residual = layer(
positions, positions,
......
...@@ -248,10 +248,7 @@ class MistralModel(nn.Module): ...@@ -248,10 +248,7 @@ class MistralModel(nn.Module):
hidden_states = self.embed_tokens(input_ids) hidden_states = self.embed_tokens(input_ids)
residual = None residual = None
for i in range(len(self.layers)): for i in range(len(self.layers)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.layers[i] layer = self.layers[i]
hidden_states, residual = layer( hidden_states, residual = layer(
positions, positions,
......
...@@ -203,8 +203,8 @@ class MPTModel(nn.Module): ...@@ -203,8 +203,8 @@ class MPTModel(nn.Module):
self.norm_f = nn.LayerNorm(config.d_model) self.norm_f = nn.LayerNorm(config.d_model)
if config.no_bias: if config.no_bias:
for module in self.modules(): for module in self.modules():
if hasattr(module, "bias"): if hasattr(module, "bias") and isinstance(
if isinstance(module.bias, nn.Parameter): module.bias, nn.Parameter):
# Remove the bias term in Linear and LayerNorm. # Remove the bias term in Linear and LayerNorm.
module.register_parameter("bias", None) module.register_parameter("bias", None)
...@@ -218,10 +218,7 @@ class MPTModel(nn.Module): ...@@ -218,10 +218,7 @@ class MPTModel(nn.Module):
) -> torch.Tensor: ) -> torch.Tensor:
hidden_states = self.wte(input_ids) hidden_states = self.wte(input_ids)
for i in range(len(self.blocks)): for i in range(len(self.blocks)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
block = self.blocks[i] block = self.blocks[i]
hidden_states = block( hidden_states = block(
position_ids, position_ids,
......
...@@ -257,10 +257,7 @@ class OPTDecoder(nn.Module): ...@@ -257,10 +257,7 @@ class OPTDecoder(nn.Module):
hidden_states = inputs_embeds + pos_embeds hidden_states = inputs_embeds + pos_embeds
for i in range(len(self.layers)): for i in range(len(self.layers)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.layers[i] layer = self.layers[i]
hidden_states = layer(hidden_states, kv_caches[i], input_metadata, hidden_states = layer(hidden_states, kv_caches[i], input_metadata,
cache_event) cache_event)
......
...@@ -258,10 +258,7 @@ class PhiModel(nn.Module): ...@@ -258,10 +258,7 @@ class PhiModel(nn.Module):
) -> SamplerOutput: ) -> SamplerOutput:
hidden_states = self.embd(input_ids) hidden_states = self.embd(input_ids)
for i in range(self.config.num_hidden_layers): for i in range(self.config.num_hidden_layers):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.h[i] layer = self.h[i]
hidden_states = layer( hidden_states = layer(
positions, positions,
......
...@@ -213,10 +213,7 @@ class QWenModel(nn.Module): ...@@ -213,10 +213,7 @@ class QWenModel(nn.Module):
hidden_states = self.wte(input_ids) hidden_states = self.wte(input_ids)
residual = None residual = None
for i in range(len(self.h)): for i in range(len(self.h)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.h[i] layer = self.h[i]
hidden_states, residual = layer( hidden_states, residual = layer(
positions, positions,
......
...@@ -249,10 +249,7 @@ class YiModel(nn.Module): ...@@ -249,10 +249,7 @@ class YiModel(nn.Module):
hidden_states = self.embed_tokens(input_ids) hidden_states = self.embed_tokens(input_ids)
residual = None residual = None
for i in range(len(self.layers)): for i in range(len(self.layers)):
if cache_events is None: cache_event = None if cache_events is None else cache_events[i]
cache_event = None
else:
cache_event = cache_events[i]
layer = self.layers[i] layer = self.layers[i]
hidden_states, residual = layer( hidden_states, residual = layer(
positions, positions,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment