Migrate linter from `pylint` to `ruff` (#1665)

5ffc0d13 · Simon Mo · GitHub · 112627e8 · 5ffc0d13 · 5ffc0d13
Unverified Commit 5ffc0d13 authored Nov 20, 2023 by Simon Mo Committed by GitHub Nov 20, 2023
5 changed files
--- a/vllm/model_executor/weight_utils.py
+++ b/vllm/model_executor/weight_utils.py
@@ -131,11 +131,9 @@ def prepare_hf_model_weights(
 ) -> Tuple[str, List[str], bool]:
    # Download model weights from huggingface.
    is_local = os.path.isdir(model_name_or_path)
-    if use_safetensors:
+    # Some quantized models use .pt files for storing the weights.
-        allow_patterns = ["*.safetensors"]
+    allow_patterns = ["*.safetensors"
-    else:
+                      ] if use_safetensors else ["*.bin", "*.pt"]
-        # Some quantized models use .pt files for storing the weights.
-        allow_patterns = ["*.bin", "*.pt"]
    if not is_local:
        # Use file lock to prevent multiple processes from
        # downloading the same model weights at the same time.
@@ -242,7 +240,7 @@ def hf_model_weights_iterator(
    elif use_safetensors:
        for st_file in hf_weights_files:
            with safe_open(st_file, framework="pt") as f:
-                for name in f.keys():
+                for name in f:
                    param = f.get_tensor(name)
                    yield name, param
    else:

--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -2,7 +2,7 @@ from typing import Optional
 from transformers import AutoConfig, PretrainedConfig
-from vllm.transformers_utils.configs import *  # pylint: disable=wildcard-import
+from vllm.transformers_utils.configs import *
 _CONFIG_REGISTRY = {
    "aquila": AquilaConfig,

--- a/vllm/transformers_utils/configs/mpt.py
+++ b/vllm/transformers_utils/configs/mpt.py
@@ -62,7 +62,6 @@ class MPTConfig(PretrainedConfig):
                 fc_type: str = 'torch',
                 verbose: Optional[int] = None,
                 **kwargs: Any):
-        # pylint: disable=line-too-long
        """The MPT configuration class.
        Args:
            d_model (int): The size of the embedding dimension of the model.
@@ -139,10 +138,10 @@ class MPTConfig(PretrainedConfig):
        self.init_config = init_config
        self.fc_type = fc_type
        if verbose is not None:
-            warnings.warn(
+            warnings.warn(DeprecationWarning(
-                DeprecationWarning(
+                'verbose argument for MPTConfig is now ignored and will be removed. Use python_log_level instead.'
-                    'verbose argument for MPTConfig is now ignored and will be removed. Use python_log_level instead.'
+            ),
-                ))
+                          stacklevel=2)
        if 'name' in kwargs:
            del kwargs['name']
        if 'loss_fn' in kwargs:
@@ -150,8 +149,8 @@ class MPTConfig(PretrainedConfig):
        if self.attn_config.get('alibi', False):
            self.learned_pos_emb = False
            warnings.warn(
-                f'alibi is turned on, setting `learned_pos_emb` to {self.learned_pos_emb}`'
+                f'alibi is turned on, setting `learned_pos_emb` to {self.learned_pos_emb}`',
-            )
+                stacklevel=2)
        super().__init__(**kwargs)
        self._validate_config()
@@ -211,7 +210,8 @@ class MPTConfig(PretrainedConfig):
            )
        if not self.learned_pos_emb and (not self.attn_config['alibi']):
            warnings.warn(
-                'Positional information not being provided to the model.')
+                'Positional information not being provided to the model.',
+                stacklevel=2)
        if self.fc_type == 'te' or self.ffn_config['ffn_type'] == 'te_ln_mlp':
            try:
                # pylint: disable=import-outside-toplevel

--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -30,7 +30,7 @@ class Counter:
 def get_max_shared_memory_bytes(gpu: int = 0) -> int:
    """Returns the maximum shared memory per thread block in bytes."""
    # https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html
-    cudaDevAttrMaxSharedMemoryPerBlockOptin = 97  # pylint: disable=invalid-name
+    cudaDevAttrMaxSharedMemoryPerBlockOptin = 97
    max_shared_mem = cuda_utils.get_device_attribute(
        cudaDevAttrMaxSharedMemoryPerBlockOptin, gpu)
    return int(max_shared_mem)

--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -350,10 +350,7 @@ class Worker:
            self.cache_engine.copy(blocks_to_copy)
            issued_cache_op = True
-        if issued_cache_op:
+        cache_events = self.cache_events if issued_cache_op else None
-            cache_events = self.cache_events
-        else:
-            cache_events = None
        # If there is no input, we don't need to execute the model.
        if not seq_group_metadata_list: