Unverified Commit 5ffc0d13 authored by Simon Mo's avatar Simon Mo Committed by GitHub
Browse files

Migrate linter from `pylint` to `ruff` (#1665)

parent 112627e8
......@@ -18,7 +18,6 @@ _PARTITION_SIZE = 512
class PagedAttention(nn.Module):
# pylint: disable=line-too-long
"""GPT-style multi-head PagedAttention.
This class takes query, key, and value tensors as input. The input tensors
......
......@@ -50,7 +50,7 @@ class AWQConfig(QuantizationConfig):
def get_config_filenames() -> List[str]:
return [
"quant_config.json", # E.g., casperhansen/vicuna-7b-v1.5-awq
"quantize_config.json", # E.g., abhinavkulkarni/mosaicml-mpt-7b-instruct-w4-g128-awq # pylint: disable=line-too-long
"quantize_config.json", # E.g., abhinavkulkarni/mosaicml-mpt-7b-instruct-w4-g128-awq
]
@classmethod
......
......@@ -7,7 +7,7 @@ import torch.nn as nn
from transformers import PretrainedConfig
from vllm.config import ModelConfig
from vllm.model_executor.models import * # pylint: disable=wildcard-import
from vllm.model_executor.models import *
from vllm.model_executor.weight_utils import (get_quant_config,
initialize_dummy_weights)
......
......@@ -261,10 +261,7 @@ class AquilaModel(nn.Module):
) -> torch.Tensor:
hidden_states = self.embed_tokens(input_ids)
for i in range(len(self.layers)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.layers[i]
hidden_states = layer(
positions,
......
......@@ -281,10 +281,7 @@ class BaiChuanModel(nn.Module):
hidden_states = self.embed_tokens(input_ids)
residual = None
for i in range(len(self.layers)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.layers[i]
hidden_states, residual = layer(
positions,
......
......@@ -256,10 +256,7 @@ class BloomModel(nn.Module):
hidden_states = self.word_embeddings(input_ids)
hidden_states = self.word_embeddings_layernorm(hidden_states)
for i in range(len(self.h)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.h[i]
hidden_states = layer(
position_ids,
......
......@@ -269,10 +269,7 @@ class GLMTransformer(nn.Module):
cache_events: Optional[List[torch.cuda.Event]],
) -> torch.Tensor:
for i in range(self.num_layers):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.layers[i]
hidden_states = layer(
hidden_states=hidden_states,
......
......@@ -353,10 +353,7 @@ class FalconModel(nn.Module):
) -> torch.Tensor:
hidden_states = self.word_embeddings(input_ids)
for i in range(len(self.h)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.h[i]
hidden_states = layer(
positions,
......
......@@ -206,10 +206,7 @@ class GPT2Model(nn.Module):
hidden_states = inputs_embeds + position_embeds
for i in range(len(self.h)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.h[i]
hidden_states = layer(hidden_states, kv_caches[i], input_metadata,
cache_event)
......
......@@ -225,10 +225,7 @@ class GPTBigCodeModel(nn.Module):
hidden_states = inputs_embeds + position_embeds
for i in range(len(self.h)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.h[i]
hidden_states = layer(hidden_states, kv_caches[i], input_metadata,
cache_event)
......
......@@ -147,10 +147,7 @@ class GPTJBlock(nn.Module):
linear_method: Optional[LinearMethodBase] = None,
):
super().__init__()
if config.n_inner is None:
inner_dim = 4 * config.n_embd
else:
inner_dim = config.n_inner
inner_dim = 4 * config.n_embd if config.n_inner is None else config.n_inner
self.ln_1 = nn.LayerNorm(config.n_embd, eps=config.layer_norm_epsilon)
self.attn = GPTJAttention(config, linear_method)
self.mlp = GPTJMLP(inner_dim, config, linear_method)
......@@ -205,10 +202,7 @@ class GPTJModel(nn.Module):
) -> torch.Tensor:
hidden_states = self.wte(input_ids)
for i in range(len(self.h)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.h[i]
hidden_states = layer(
position_ids,
......
......@@ -216,10 +216,7 @@ class GPTNeoXModel(nn.Module):
) -> torch.Tensor:
hidden_states = self.embed_in(input_ids)
for i in range(len(self.layers)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.layers[i]
hidden_states = layer(
position_ids,
......
......@@ -213,10 +213,7 @@ class InternLMModel(nn.Module):
hidden_states = self.embed_tokens(input_ids)
residual = None
for i in range(len(self.layers)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.layers[i]
hidden_states, residual = layer(
positions,
......
......@@ -253,10 +253,7 @@ class LlamaModel(nn.Module):
hidden_states = self.embed_tokens(input_ids)
residual = None
for i in range(len(self.layers)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.layers[i]
hidden_states, residual = layer(
positions,
......
......@@ -248,10 +248,7 @@ class MistralModel(nn.Module):
hidden_states = self.embed_tokens(input_ids)
residual = None
for i in range(len(self.layers)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.layers[i]
hidden_states, residual = layer(
positions,
......
......@@ -203,10 +203,10 @@ class MPTModel(nn.Module):
self.norm_f = nn.LayerNorm(config.d_model)
if config.no_bias:
for module in self.modules():
if hasattr(module, "bias"):
if isinstance(module.bias, nn.Parameter):
# Remove the bias term in Linear and LayerNorm.
module.register_parameter("bias", None)
if hasattr(module, "bias") and isinstance(
module.bias, nn.Parameter):
# Remove the bias term in Linear and LayerNorm.
module.register_parameter("bias", None)
def forward(
self,
......@@ -218,10 +218,7 @@ class MPTModel(nn.Module):
) -> torch.Tensor:
hidden_states = self.wte(input_ids)
for i in range(len(self.blocks)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
block = self.blocks[i]
hidden_states = block(
position_ids,
......
......@@ -257,10 +257,7 @@ class OPTDecoder(nn.Module):
hidden_states = inputs_embeds + pos_embeds
for i in range(len(self.layers)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.layers[i]
hidden_states = layer(hidden_states, kv_caches[i], input_metadata,
cache_event)
......
......@@ -258,10 +258,7 @@ class PhiModel(nn.Module):
) -> SamplerOutput:
hidden_states = self.embd(input_ids)
for i in range(self.config.num_hidden_layers):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.h[i]
hidden_states = layer(
positions,
......
......@@ -213,10 +213,7 @@ class QWenModel(nn.Module):
hidden_states = self.wte(input_ids)
residual = None
for i in range(len(self.h)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.h[i]
hidden_states, residual = layer(
positions,
......
......@@ -249,10 +249,7 @@ class YiModel(nn.Module):
hidden_states = self.embed_tokens(input_ids)
residual = None
for i in range(len(self.layers)):
if cache_events is None:
cache_event = None
else:
cache_event = cache_events[i]
cache_event = None if cache_events is None else cache_events[i]
layer = self.layers[i]
hidden_states, residual = layer(
positions,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment