暂时去掉profilling标志位，避免影响其他模型

6f5d76dc · 王敏 · a0d02d42 · 6f5d76dc
Commit 6f5d76dc authored Sep 04, 2025 by 王敏
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 5 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +3 -5

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2051,8 +2051,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
                input_ids = None
                inputs_embeds = self.inputs_embeds[:num_tokens]
            else:
-                #self.input_ids[:num_tokens] = torch.randint(0, 120000, (num_tokens,), dtype=torch.int32)
-                #self.input_ids[:num_tokens] = torch.arange(num_tokens, dtype=torch.int32, device=self.input_ids.device)
                input_ids = self.input_ids[:num_tokens]
                inputs_embeds = None
            if self.uses_mrope:
@@ -2226,8 +2224,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):

    def profile_run(self) -> None:
        # set profiling flag to avoid torch compile
-        set_profilling(True)
-        self._sync_device()
+        #set_profilling(True)
+        #self._sync_device()

        # Profile with multimodal encoder & encoder cache.
        # TODO: handle encoder-decoder models once we support them.
@@ -2312,7 +2310,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
        del hidden_states, output
        self.encoder_cache.clear()
        gc.collect()
-        set_profilling(False)
+        #set_profilling(False)

    def capture_model(self) -> None:
        if not self.use_cuda_graph: